RubyGems - rika - Versions diffs - 0.9.3-java → 0.9.4-java - Mend

rika 0.9.3-java → 0.9.4-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/README.md CHANGED

@@ -31,6 +31,10 @@ Something like this:
 	# Return the content of the document:
 	parser.content
+	# Return the media type for the document:
+	parser.media_type
+	=> "application/pdf"
 	# Return the metadata field title if it exists:
 	parser.metadata["title"] if parser.metadata_exists?("title")
@@ -42,7 +46,7 @@ Something like this:
 	parser.content # 10000 first chars returned
 	# Return content from URL
-	parser = Rika::Parser.new('http://www.exampleurl.com/example.pdf')
+	parser = Rika::Parser.new('http://riakhandbook.com/sample.pdf', 200)
 	parser.content
 ```
 ## Contributing

data/lib/rika.rb CHANGED

@@ -46,6 +46,10 @@ module Rika
       metadata_hash
     end
+    def media_type
+      @media_type
+    end
     def available_metadata
       @metadata.names.to_a
     end
@@ -59,6 +63,7 @@ module Rika
     def parse_file
       input_stream = java.io.FileInputStream.new(java.io.File.new(@uri))
       @metadata.set("filename", File.basename(@uri))
+      @media_type = @tika.detect(java.io.FileInputStream.new(java.io.File.new(@uri)))
       @content = @tika.parse_to_string(input_stream, @metadata)
     end
@@ -66,6 +71,7 @@ module Rika
       raise IOError, "File does not exist or can't be reached." if not Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess)
       url = java.net.URL.new(@uri)
       input_stream = url.open_stream
+      @media_type = @tika.detect(url.open_stream)
       @metadata.set("url", @uri)
       @content = @tika.parse_to_string(input_stream, @metadata)
     end

data/lib/rika/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Rika
-  VERSION = "0.9.3"
+  VERSION = "0.9.4"
 end

data/spec/fixtures/unknown.bin ADDED

Binary file

data/spec/rika_spec.rb CHANGED

@@ -11,10 +11,11 @@ describe Rika::Parser do
     @docx_parser = Rika::Parser.new(file_path("document.docx"))
     @pdf_parser = Rika::Parser.new(file_path("document.pdf"))
     @image_parser = Rika::Parser.new(file_path("image.jpg"))
+    @unknown_parser = Rika::Parser.new(file_path("unknown.bin"))
     @dir = File.expand_path(File.join(File.dirname(__FILE__), 'fixtures'))
     port = 50505
     @url = "http://#{Socket.gethostname}:#{port}"
+    @qoute = "First they ignore you, then they ridicule you, then they fight you, then you win."
     @t1 = Thread.new do
       @server = HTTPServer.new(:Port => port, :DocumentRoot => @dir,
       :AccessLog => [], :Logger => WEBrick::Log::new("/dev/null", 7))
@@ -41,15 +42,15 @@ describe Rika::Parser do
   describe '#content' do
     it "should return the content in a text file" do
-      @txt_parser.content.strip.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
+      @txt_parser.content.strip.should == @qoute
     end
     it "should return the content in a docx file" do
-      @docx_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
+      @docx_parser.content.should == @qoute
     end
     it "should return the content in a pdf file" do
-      @pdf_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
+      @pdf_parser.content.should == @qoute
     end
     it "should return no content for an image" do
@@ -73,7 +74,11 @@ describe Rika::Parser do
     it "should return the content from a file over http" do
       parser = Rika::Parser.new(@url + "/document.pdf")
-      parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
+      parser.content.should == @qoute
+    end
+    it "should return empty string for unknown file" do
+      @unknown_parser.content.should be_empty
     end
   end
@@ -127,4 +132,23 @@ describe Rika::Parser do
       @docx_parser.metadata_exists?("title").should == true
     end
   end
+  describe '#media_type' do
+    it "should return application/pdf for a pdf file" do
+      @pdf_parser.media_type.should == "application/pdf"
+    end
+    it "should return text/plain for a txt file" do
+      @txt_parser.media_type.should == "text/plain"
+    end
+    it "should return application/pdf for a pdf over http" do
+      parser = Rika::Parser.new(@url + "/document.pdf")
+      parser.media_type.should == "application/pdf"
+    end
+    it "should return application/octet-stream for unknown file" do
+      @unknown_parser.media_type.should == "application/octet-stream"
+    end
+  end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rika
 version: !ruby/object:Gem::Version
-  version: 0.9.3
+  version: 0.9.4
   prerelease:
 platform: java
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-09-23 00:00:00.000000000 Z
+date: 2012-09-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -54,6 +54,7 @@ files:
 - spec/fixtures/over_100k_file.txt
 - spec/fixtures/text_file.txt
 - spec/fixtures/text_file_without_extension
+- spec/fixtures/unknown.bin
 - spec/rika_spec.rb
 - spec/spec_helper.rb
 - target/dependency/apache-mime4j-core-0.7.2.jar
@@ -124,5 +125,6 @@ test_files:
 - spec/fixtures/over_100k_file.txt
 - spec/fixtures/text_file.txt
 - spec/fixtures/text_file_without_extension
+- spec/fixtures/unknown.bin
 - spec/rika_spec.rb
 - spec/spec_helper.rb