RubyGems - textractor - Versions diffs - 0.0.1 → 0.0.2 - Mend

textractor 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/README.md CHANGED Viewed

@@ -1,21 +1,23 @@
 # textractor
-textractor is a ruby library that provides a simple wrapper for
-extracting text from PDF and Word documents.
+textractor is a ruby library that provides a simple wrapper around CLI
+tools for extracting text from PDF and Word documents.
 ## Setup
+    gem install textractor
 In order to use textractor you have to install a few command line
 tools.
 ### OS X
-    port install wv pdftohtml links
+    port install wv xpdf links
 I recommend using also passing +no_x11 to the install command, but
 this may not work on all systems due to dependency issues.
-    port install wv pdftohtml links +no_x11
+    port install wv xpdf links +no_x11
 ### Ubuntu 8.04

data/Rakefile CHANGED Viewed

@@ -5,12 +5,12 @@ begin
   require 'jeweler'
   Jeweler::Tasks.new do |gem|
     gem.name = "textractor"
-    gem.summary = %Q{simple wrapper for extracting text from PDF and Word documents}
-    gem.description = %Q{simple wrapper for extracting text from PDF and Word documents}
+    gem.summary = %Q{simple wrapper around CLI tools for extracting text from PDF and Word documents}
+    gem.description = %Q{simple wrapper around CLI for extracting text from PDF and Word documents}
     gem.email = "mguterl@gmail.com"
     gem.homepage = "http://github.com/mguterl/textractor"
     gem.authors = ["Michael Guterl"]
-    gem.add_development_dependency "rspec", ">= 1.2.9"
+    gem.add_development_dependency "rspec", ">= 1.3.0"
     # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
   end
   Jeweler::GemcutterTasks.new

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.1
1	+ 0.0.2

data/lib/textractor/document.rb CHANGED Viewed

@@ -7,6 +7,7 @@ module Textractor
       'application/x-pdf' => :pdf,
       'application/doc'   => :word,
       'application/x-doc' => :word,
+      'text/plain'        => :txt
     }
     attr_reader :filename
@@ -27,6 +28,8 @@ module Textractor
         :pdf
       when /doc/
         :word
+      when /txt/
+        :txt
       else
         nil
       end
@@ -46,6 +49,10 @@ module Textractor
       `wvWare -c utf-8 --nographics -x #{Textractor.wvText_path} #{filename} 2>/dev/null`.strip
     end
+    def extract_from_txt
+      File.read(filename)
+    end
   end
 end

data/spec/document_spec.rb CHANGED Viewed

@@ -4,6 +4,7 @@ describe Textractor::Document do
   PDF_DOCUMENT_FIXTURE  = File.expand_path(File.dirname(__FILE__) + "/fixtures/document.pdf")
   WORD_DOCUMENT_FIXTURE = File.expand_path(File.dirname(__FILE__) + "/fixtures/document.doc")
+  TXT_DOCUMENT_FIXTURE  = File.expand_path(File.dirname(__FILE__) + "/fixtures/document.txt")
   it 'should require a filename to create' do
     expect { Textractor::Document.new }.to raise_error(ArgumentError)
@@ -30,6 +31,15 @@ describe Textractor::Document do
     end
+    describe "with txt document" do
+      it 'should extract the text from the document' do
+        @doc = Textractor::Document.new(TXT_DOCUMENT_FIXTURE)
+        @doc.text.should == "Ruby on rails developer"
+      end
+    end
   end
   describe "#type" do

data/spec/fixtures/document.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ Ruby on rails developer

data/textractor.gemspec ADDED Viewed

@@ -0,0 +1,62 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{textractor}
+  s.version = "0.0.2"
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Michael Guterl"]
+  s.date = %q{2010-07-26}
+  s.description = %q{simple wrapper around CLI for extracting text from PDF and Word documents}
+  s.email = %q{mguterl@gmail.com}
+  s.extra_rdoc_files = [
+    "LICENSE",
+     "README.md"
+  ]
+  s.files = [
+    ".document",
+     ".gitignore",
+     "LICENSE",
+     "README.md",
+     "Rakefile",
+     "VERSION",
+     "lib/textractor.rb",
+     "lib/textractor/document.rb",
+     "spec/document_spec.rb",
+     "spec/fixtures/document.doc",
+     "spec/fixtures/document.pdf",
+     "spec/fixtures/document.txt",
+     "spec/spec.opts",
+     "spec/spec_helper.rb",
+     "spec/textractor_spec.rb",
+     "support/wvText.xml",
+     "textractor.gemspec"
+  ]
+  s.homepage = %q{http://github.com/mguterl/textractor}
+  s.rdoc_options = ["--charset=UTF-8"]
+  s.require_paths = ["lib"]
+  s.rubygems_version = %q{1.3.7}
+  s.summary = %q{simple wrapper around CLI tools for extracting text from PDF and Word documents}
+  s.test_files = [
+    "spec/document_spec.rb",
+     "spec/spec_helper.rb",
+     "spec/textractor_spec.rb"
+  ]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
+      s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
+    else
+      s.add_dependency(%q<rspec>, [">= 1.3.0"])
+    end
+  else
+    s.add_dependency(%q<rspec>, [">= 1.3.0"])
+  end
+end

metadata CHANGED Viewed

@@ -1,12 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: textractor
 version: !ruby/object:Gem::Version
+  hash: 27
   prerelease: false
   segments:
   - 0
   - 0
-  - 1
-  version: 0.0.1
+  - 2
+  version: 0.0.2
 platform: ruby
 authors:
 - Michael Guterl
@@ -14,24 +15,26 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-04-20 00:00:00 -04:00
+date: 2010-07-26 00:00:00 -04:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
   prerelease: false
   requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 27
         segments:
         - 1
-        - 2
-        - 9
-        version: 1.2.9
+        - 3
+        - 0
+        version: 1.3.0
   type: :development
   version_requirements: *id001
-description: simple wrapper for extracting text from PDF and Word documents
+description: simple wrapper around CLI for extracting text from PDF and Word documents
 email: mguterl@gmail.com
 executables: []
@@ -52,10 +55,12 @@ files:
 - spec/document_spec.rb
 - spec/fixtures/document.doc
 - spec/fixtures/document.pdf
+- spec/fixtures/document.txt
 - spec/spec.opts
 - spec/spec_helper.rb
 - spec/textractor_spec.rb
 - support/wvText.xml
+- textractor.gemspec
 has_rdoc: true
 homepage: http://github.com/mguterl/textractor
 licenses: []
@@ -66,26 +71,30 @@ rdoc_options:
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
       segments:
       - 0
       version: "0"
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
       segments:
       - 0
       version: "0"
 requirements: []
 rubyforge_project:
-rubygems_version: 1.3.6
+rubygems_version: 1.3.7
 signing_key:
 specification_version: 3
-summary: simple wrapper for extracting text from PDF and Word documents
+summary: simple wrapper around CLI tools for extracting text from PDF and Word documents
 test_files:
 - spec/document_spec.rb
 - spec/spec_helper.rb