RubyGems - rika - Versions diffs - 1.0.0-java → 1.1.1-java - Mend

rika 1.0.0-java → 1.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/.gitignore CHANGED

@@ -17,3 +17,5 @@ test/version_tmp
 tmp
 .DS_Store
+projectFilesBackup/
+.idea/

data/README.md CHANGED

@@ -25,42 +25,60 @@ Or install it yourself as:
 ## Usage
-Something like this:
+For a quick start with the simplest use cases, the following functions
+are provided to get what you need in a single function call, for your convenience:
+```ruby
+require 'rika'
+content           = Rika.parse_content('document.pdf')    # string containing all content text
+metadata          = Rika.parse_metadata('document.pdf')   # hash containing the document metadata
+content, metadata = Rika.parse_content_and_metadata('document.pdf')   # both of the above
+```
+For other use cases and finer control, you can work directly with the Rika::Parser object:
 ```ruby
-	require 'rika'
+require 'rika'
-	parser = Rika::Parser.new('document.pdf')
+parser = Rika::Parser.new('document.pdf')
-	# Return the content of the document:
-	parser.content
+# Return the content of the document:
+parser.content
-	# Return the media type for the document:
-	parser.media_type
-	=> "application/pdf"
+# Return the media type for the document:
+parser.media_type
+=> "application/pdf"
-	# Return the metadata field title if it exists:
-	parser.metadata["title"] if parser.metadata_exists?("title")
+# Return the metadata field title if it exists:
+parser.metadata["title"] if parser.metadata_exists?("title")
-	# Return all the available metadata keys that can be read from the document
-	parser.available_metadata
+# Return all the available metadata keys that can be read from the document
+parser.available_metadata
-	# Return only the first 10000 chars of the content:
-	parser = Rika::Parser.new('document.pdf', 10000)
-	parser.content # 10000 first chars returned
+# Return only the first 10000 chars of the content:
+parser = Rika::Parser.new('document.pdf', 10000)
+parser.content # 10000 first chars returned
-	# Return content from URL
-	parser = Rika::Parser.new('http://riakhandbook.com/sample.pdf', 200)
-	parser.content
+# Return content from URL
+parser = Rika::Parser.new('http://riakhandbook.com/sample.pdf', 200)
+parser.content
-	# Return the language for the content
-	parser = parser = Rika::Parser.new('german document.pdf')
-	parser.language
-	=> "de"
+# Return the language for the content
+parser = parser = Rika::Parser.new('german document.pdf')
+parser.language
+=> "de"
-	# Check whether the langugage identification is certain enough to be trusted
-	parser.language_is_reasonably_certain?
+# Check whether the langugage identification is certain enough to be trusted
+parser.language_is_reasonably_certain?
 ```
+## Credits
+The following people have contributed ideas, documentation, or code to Rika:
+* Keith Bennett
+* Richard Nyström
 ## Contributing
 1. Fork it

data/lib/rika.rb CHANGED

@@ -18,7 +18,23 @@ module Rika
   import org.apache.tika.language.LanguageIdentifier
   import java.io.FileInputStream
   import java.net.URL
+  def self.parse_content_and_metadata(file_location, max_content_length = -1)
+    parser = Parser.new(file_location, max_content_length)
+    [parser.content, parser.metadata]
+  end
+  def self.parse_content(file_location, max_content_length = -1)
+    parser = Parser.new(file_location, max_content_length)
+    parser.content
+  end
+  def self.parse_metadata(file_location)
+    parser = Parser.new(file_location, 0)
+    parser.metadata
+  end
   class Parser
     def initialize(file_location, max_content_length = -1)

data/lib/rika/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Rika
-  VERSION = "1.0.0"
+  VERSION = "1.1.1"
 end

data/spec/rika_spec.rb CHANGED

@@ -21,6 +21,7 @@ describe Rika::Parser do
       :AccessLog => [], :Logger => WEBrick::Log::new("/dev/null", 7))
       @server.start
     end
+    @sample_pdf_filespec = file_path("document.pdf")
   end
   after(:all) do
@@ -173,4 +174,20 @@ describe Rika::Parser do
       lang.language_is_reasonably_certain? == true
     end
   end
-end
+  it "should return valid content using Rika.parse_content" do
+    content = Rika.parse_content(@sample_pdf_filespec)
+    (content.should be_a(String)) && (content.should_not be_empty)
+  end
+  it "should return valid metadata using Rika.parse_metadata" do
+    metadata = Rika.parse_metadata(@sample_pdf_filespec)
+    (metadata.should be_a(Hash)) && (metadata.should_not be_empty)
+  end
+  it "should return valid content and metadata using Rika.parse_content_and_metadata" do
+    content, metadata = Rika.parse_content_and_metadata(@sample_pdf_filespec)
+    (content.should be_a(String)) && (content.should_not be_empty) && \
+      (metadata.should be_a(Hash)) && (metadata.should_not be_empty)
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rika
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.1.1
   prerelease:
 platform: java
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-02-23 00:00:00.000000000 Z
+date: 2013-02-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -43,17 +43,17 @@ dependencies:
     none: false
   prerelease: false
   type: :development
-description: ! ' A JRuby wrapper for Apache Tika to extract text and metadata from
-  various file formats. '
+description: " A JRuby wrapper for Apache Tika to extract text and metadata from various\
+  \ file formats. "
 email:
 - ricny046@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- .gitignore
-- .rspec
-- .travis.yml
+- ".gitignore"
+- ".rspec"
+- ".travis.yml"
 - Gemfile
 - LICENSE.txt
 - README.md
@@ -123,14 +123,14 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: !binary |-
         MA==
   none: false
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: !binary |-
         MA==