moro-delta_attack 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 / 2008-09-12
2
+
3
+ * initial release
4
+
data/NOTICE ADDED
@@ -0,0 +1,5 @@
1
+ This library depends on Apache POI libraries,
2
+ which is provided as Apache Licence version 2.
3
+
4
+ http://poi.apache.org/
5
+
data/README ADDED
@@ -0,0 +1,50 @@
1
+
2
+ = delta_attack
3
+
4
+
5
+ == Description
6
+
7
+ Extract MS Office files to plain text.
8
+
9
+ == Installation
10
+
11
+
12
+ === Archive Installation
13
+
14
+ $ rake install
15
+
16
+ === Gem Installation
17
+
18
+ $ gem source -a http://gems.github.com
19
+ $ gem install moro-delta-attack
20
+
21
+ == Features/Problems
22
+
23
+ Extract MS Office files to plain text usin Apache POI and JRuby.
24
+ It works with Client/Server architecture.
25
+
26
+ The extract server is works on JRuby but the client is works with
27
+ both cRuby and JRuby.
28
+
29
+ This library originally aim to index Office documents to fulltext
30
+ serach engine.
31
+
32
+ == Synopsis
33
+
34
+ first you start DeltaAttackServer, which needs JRuby and Apache POI
35
+
36
+ $ export CLASSPATH=path/to/poi-3.1-FINAL/poi-3.1-FINAL-20080629.jar:\
37
+ path/to/poi-3.1-FINAL/poi-scratchpad-3.1-FINAL-20080629.jar
38
+ $ jruby bin/delta_attack_server
39
+
40
+ Then you can use DeltaAttack::Client, in both CRuby(MRI) and JRuby.
41
+
42
+ require 'delta_attack/client'
43
+ DeletaAttack::Client.cast("path/to/some.xls")
44
+
45
+ == Copyright
46
+
47
+ Author:: moro <moronatural@gmail.com>
48
+ Copyright:: Copyright (c) 2008 moro
49
+ License:: MIT
50
+
data/Rakefile ADDED
@@ -0,0 +1,139 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/testtask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'rake/contrib/rubyforgepublisher'
9
+ require 'rake/contrib/sshpublisher'
10
+ require 'lib/delta_attack'
11
+ require 'spec/rake/spectask'
12
+ require 'fileutils'
13
+ include FileUtils
14
+
15
+ NAME = "delta_attack"
16
+ AUTHOR = "MOROHASHI Kyosuke"
17
+ EMAIL = "moronatural@gmail.com"
18
+ DESCRIPTION = "extract text from MS Office document with Apache POI"
19
+ # RUBYFORGE_PROJECT = "delta_attack"
20
+ HOMEPATH = "http://github.com/moro/delta_attack"
21
+ BIN_FILES = %w( delta_attack_server )
22
+ VERS = DeltaAttack::VERSION
23
+
24
+
25
+ REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
26
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config']
27
+ RDOC_OPTS = [
28
+ '--title', "#{NAME} documentation",
29
+ "--charset", "utf-8",
30
+ "--opname", "index.html",
31
+ "--line-numbers",
32
+ "--main", "README",
33
+ "--inline-source",
34
+ ]
35
+
36
+ task :default => [:spec]
37
+ task :package => [:clean]
38
+
39
+ Spec::Rake::SpecTask.new("spec") do |t|
40
+ t.libs << "spec"
41
+ t.pattern = "spec/**/*_spec.rb"
42
+ t.verbose = true
43
+ end
44
+
45
+ spec = Gem::Specification.new do |s|
46
+ s.name = NAME
47
+ s.version = VERS
48
+ s.platform = Gem::Platform::RUBY
49
+ s.has_rdoc = true
50
+ s.extra_rdoc_files = ["README", "ChangeLog"]
51
+ s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
52
+ s.summary = DESCRIPTION
53
+ s.description = DESCRIPTION
54
+ s.author = AUTHOR
55
+ s.email = EMAIL
56
+ s.homepage = HOMEPATH
57
+ s.executables = BIN_FILES
58
+ # s.rubyforge_project = RUBYFORGE_PROJECT
59
+ s.bindir = "bin"
60
+ s.require_path = "lib"
61
+ s.test_files = Dir["spec/*_test.rb"]
62
+
63
+ #s.add_dependency('activesupport', '>=1.3.1')
64
+ #s.required_ruby_version = '>= 1.8.2'
65
+
66
+ s.files = %w(README NOTICE ChangeLog Rakefile) +
67
+ Dir.glob("{bin,doc,spec,lib,templates,generator,extras,website,script}/**/*") +
68
+ #Dir.glob("ext/**/*.{h,c,rb}") +
69
+ #Dir.glob("examples/**/*.rb") +
70
+ Dir.glob("tools/*.rb")
71
+
72
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
73
+ end
74
+
75
+ Rake::GemPackageTask.new(spec) do |p|
76
+ p.need_tar = true
77
+ p.gem_spec = spec
78
+ end
79
+
80
+ task :debug_gem do |p|
81
+ puts spec.to_ruby
82
+ end
83
+
84
+ task :install do
85
+ name = "#{NAME}-#{VERS}.gem"
86
+ sh %{rake package}
87
+ sh %{sudo gem install pkg/#{name}}
88
+ end
89
+
90
+ task :uninstall => [:clean] do
91
+ sh %{sudo gem uninstall #{NAME}}
92
+ end
93
+
94
+
95
+ Rake::RDocTask.new do |rdoc|
96
+ rdoc.rdoc_dir = 'html'
97
+ rdoc.options += RDOC_OPTS
98
+ rdoc.template = "resh"
99
+ #rdoc.template = "#{ENV['template']}.rb" if ENV['template']
100
+ if ENV['DOC_FILES']
101
+ rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
102
+ else
103
+ rdoc.rdoc_files.include('README', 'ChangeLog')
104
+ rdoc.rdoc_files.include('lib/**/*.rb')
105
+ rdoc.rdoc_files.include('ext/**/*.c')
106
+ end
107
+ end
108
+ =begin
109
+ desc "Publish to RubyForge"
110
+ task :rubyforge => [:rdoc, :package] do
111
+ require 'rubyforge'
112
+ Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'moro').upload
113
+ end
114
+
115
+ desc 'Package and upload the release to rubyforge.'
116
+ task :release => [:clean, :package] do |t|
117
+ v = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
118
+ abort "Versions don't match #{v} vs #{VERS}" unless v == VERS
119
+ pkg = "pkg/#{NAME}-#{VERS}"
120
+
121
+ require 'rubyforge'
122
+ rf = RubyForge.new
123
+ puts "Logging in"
124
+ rf.login
125
+
126
+ c = rf.userconfig
127
+ # c["release_notes"] = description if description
128
+ # c["release_changes"] = changes if changes
129
+ c["preformatted"] = true
130
+
131
+ files = [
132
+ "#{pkg}.tgz",
133
+ "#{pkg}.gem"
134
+ ].compact
135
+
136
+ puts "Releasing #{NAME} v. #{VERS}"
137
+ rf.add_release RUBYFORGE_PROJECT, NAME, VERS, *files
138
+ end
139
+ =end
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env jruby
2
+ # vim:set fileencoding=utf-8 filetype=ruby
3
+ $KCODE = 'u'
4
+
5
+ require "optparse"
6
+ require "rbconfig"
7
+ require "delta_attack"
8
+
9
+ module DeltaAttack
10
+ class Server
11
+
12
+ DEFAULT_OPTION = {
13
+ :port => 3333,
14
+ :mount => "/extract",
15
+ }.freeze
16
+
17
+ def self.run(argv)
18
+ if RbConfig::CONFIG["arch"] =~ /java/i
19
+ new(argv.dup).run
20
+ else
21
+ exec(*["jruby", $0, argv])
22
+ end
23
+ end
24
+
25
+ def initialize(argv)
26
+ @argv = argv
27
+ @options = DEFAULT_OPTION.dup
28
+
29
+ @parser = OptionParser.new do |parser|
30
+ parser.banner = <<-EOB.gsub(/^\t+/, "")
31
+ Usage: #$0 [options]
32
+ EOB
33
+
34
+ parser.separator "Options:"
35
+ parser.on("-p", "--port=PORT", Integer, "specify port default: #{DEFAULT_OPTION[:port]}") do |v|
36
+ @options[:port] = v
37
+ end
38
+ parser.on("-m", "--mount=PATH", String, "mount path of extract servlet #{DEFAULT_OPTION[:mount].dump}") do |v|
39
+ @options[:mount] = v
40
+ end
41
+
42
+ parser.separator ""
43
+
44
+ parser.on("--version", "Show version string `#{VERSION}'") do
45
+ puts VERSION
46
+ exit
47
+ end
48
+ end
49
+ end
50
+
51
+ def run
52
+ @parser.order!(@argv)
53
+ require 'webrick/httpserver'
54
+ require 'delta_attack/extractor'
55
+ require 'delta_attack/extractor/servlet'
56
+
57
+ @server = WEBrick::HTTPServer.new(:Port=>@options[:port])
58
+ @server.mount(@options[:mount], DeltaAttack::Extractor::Servlet)
59
+ trap("INT"){ @server.shutdown }
60
+ @server.start
61
+ end
62
+ end
63
+ end
64
+
65
+ DeltaAttack::Server.run(ARGV)
66
+
@@ -0,0 +1,57 @@
1
+
2
+ require 'net/http'
3
+ require 'delta_attack/filetype_assumption'
4
+ require 'securerandom'
5
+
6
+ module DeltaAttack
7
+ class Client
8
+ class << self
9
+ def cast(file, host="localhost", port=3333)
10
+ begin
11
+ req = new(file).request
12
+ res = Net::HTTP.start(host, port){|http| http.request(req) }
13
+ raise "Request failed #{res}" unless res.is_a? Net::HTTPOK
14
+ res.body
15
+ rescue Errno::ECONNREFUSED => e
16
+ raise "DeltaAttack Server is down on http://#{host}:#{port}"
17
+ end
18
+ end
19
+ alias extract cast
20
+ end
21
+
22
+ def initialize(filename, content=nil)
23
+ @filename = filename
24
+ @content = content
25
+ end
26
+
27
+ def boundary
28
+ @boundary ||= SecureRandom.hex(8)
29
+ end
30
+
31
+ def content
32
+ @content ||= File.open(@filename,"rb"){|f| f.read }
33
+ end
34
+
35
+ def content_type
36
+ @content_type ||= FiletypeAssumption.new(File.basename(@filename)).content_type
37
+ end
38
+
39
+ def body
40
+ data = ''
41
+ data << "--#{boundary}\r\n"
42
+ data << "Content-Disposition: form-data; name=\"file\"; filename=\"#{@filename}\"\r\n"
43
+ data << "Content-Type: #{content_type}\r\n\r\n"
44
+ data << content
45
+ data << "\r\n--#{boundary}--\r\n"
46
+ end
47
+
48
+ def request(path = "/extract" )
49
+ req = Net::HTTP::Post.new(path)
50
+ req.content_type = "multipart/form-data; boundary=#{boundary}"
51
+ req.body = body
52
+ req.content_length = req.body.size
53
+ req
54
+ end
55
+ end
56
+ end
57
+
@@ -0,0 +1,23 @@
1
+ require 'java'
2
+
3
+ module DeltaAttack
4
+ module Extractor
5
+ class Base
6
+ attr_accessor :bytes
7
+ def initialize(bytes)
8
+ @bytes = bytes
9
+ end
10
+
11
+ def data(ignore_cache=false)
12
+ return @data if (!ignore_cache) && @data
13
+
14
+ @data = extract_data
15
+ end
16
+
17
+ private
18
+ def java_input_stream
19
+ Java::JavaIo::ByteArrayInputStream.new(@bytes)
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,41 @@
1
+ require 'delta_attack/extractor/base'
2
+
3
+ include_class 'org.apache.poi.hssf.usermodel.HSSFWorkbook'
4
+ include_class 'org.apache.poi.hssf.usermodel.HSSFCell'
5
+
6
+ module DeltaAttack
7
+ module Extractor
8
+ class Excel < Base
9
+ private
10
+ def extract_data
11
+ input_stream = java_input_stream
12
+ begin
13
+ book = HSSFWorkbook.new(input_stream)
14
+ return (0...book.number_of_sheets).map do |i|
15
+ extract_sheet(book.sheet_at(i))
16
+ end
17
+ ensure
18
+ input_stream.close
19
+ end
20
+ end
21
+
22
+ def extract_sheet(sheet)
23
+ sheet.iterator.map do |row|
24
+ row.iterator.map{|cell| handle_cell(cell) }
25
+ end
26
+ end
27
+
28
+ def handle_cell(cell)
29
+ case cell.cell_type
30
+ when HSSFCell::CELL_TYPE_NUMERIC
31
+ cell.numeric_cell_value
32
+ when HSSFCell::CELL_TYPE_STRING
33
+ cell.rich_string_cell_value.string
34
+ when HSSFCell::CELL_TYPE_BOOLEAN, HSSFCell::CELL_TYPE_BLANK
35
+ nil
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,20 @@
1
+ require 'delta_attack/extractor/base'
2
+
3
+ include_class 'org.apache.poi.hslf.usermodel.SlideShow'
4
+
5
+ module DeltaAttack
6
+ module Extractor
7
+ class PowerPoint < Base
8
+ private
9
+ def extract_data
10
+ input_stream = java_input_stream
11
+ begin
12
+ slide_show = SlideShow.new(input_stream)
13
+ slide_show.slides.map do |slide|
14
+ slide.text_runs.map{|tr| tr.text }
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ require 'webrick/httpservlet'
2
+ require 'delta_attack/extractor'
3
+ require 'delta_attack/filetype_assumption'
4
+
5
+ module DeltaAttack
6
+ module Extractor
7
+ class Servlet < WEBrick::HTTPServlet::AbstractServlet
8
+ def do_GET(req, res)
9
+ res.body = <<-HTML
10
+ <html>
11
+ <head></head>
12
+ <body>
13
+ <form action="/extract" enctype="multipart/form-data" method="post">
14
+ <input type="file" name="file" />
15
+ <input type="submit" name="submit" value="up" />
16
+ </form>
17
+ </body>
18
+ </html>
19
+ HTML
20
+ end
21
+
22
+ def do_POST(req, res)
23
+ f = req.query["file"]
24
+ type = FiletypeAssumption.new(f.filename, f['content-type'])
25
+ res.body = Extractor.extract(f.to_s, type.filetype)
26
+ res.content_type = "text/plain"
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,25 @@
1
+ require 'delta_attack/extractor/base'
2
+
3
+ include_class 'org.apache.poi.hwpf.HWPFDocument'
4
+
5
+ module DeltaAttack
6
+ module Extractor
7
+ class Word < Base
8
+
9
+ private
10
+ def extract_data
11
+ input_stream = java_input_stream
12
+ begin
13
+ book = HWPFDocument.new(input_stream)
14
+ range = book.range
15
+ (0...range.num_paragraphs).map do |i|
16
+ range.paragraph(i).text.strip
17
+ end
18
+ ensure
19
+ input_stream.close
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+
@@ -0,0 +1,20 @@
1
+ require 'delta_attack/extractor/base'
2
+ require 'delta_attack/extractor/word'
3
+ require 'delta_attack/extractor/excel'
4
+ require 'delta_attack/extractor/power_point'
5
+
6
+ module DeltaAttack
7
+ module Extractor
8
+ def extract(content,type)
9
+ extractor = case type
10
+ when :word then Word
11
+ when :excel then Excel
12
+ when :power_point then PowerPoint
13
+ else return "not supported"
14
+ end
15
+
16
+ extractor.new(content.to_java_bytes).data.flatten.join("\n")
17
+ end
18
+ module_function :extract
19
+ end
20
+ end
@@ -0,0 +1,46 @@
1
+ begin
2
+ require 'mahoro'
3
+ rescue LoadError
4
+ nil
5
+ end
6
+
7
+ module DeltaAttack
8
+ class FiletypeAssumption
9
+ CONTENT_TYPES = {
10
+ "application/msword" => :word,
11
+ "application/vnd.ms-excel" => :excel,
12
+ "application/vnd.ms-powerpoint" => :power_point,
13
+ }.freeze
14
+
15
+ def self.support_magic?
16
+ defined? Mahoro
17
+ end
18
+
19
+ def initialize(filename, content_type = nil, content = nil)
20
+ @filename = filename
21
+ @content_type = content_type
22
+ @content = content
23
+ end
24
+
25
+ def filetype
26
+ by_content_type || by_extention || :unknown
27
+ end
28
+
29
+ def content_type
30
+ CONTENT_TYPES.index(filetype)
31
+ end
32
+
33
+ private
34
+ def by_content_type
35
+ CONTENT_TYPES[@content_type]
36
+ end
37
+
38
+ def by_extention
39
+ case File.extname(@filename).downcase
40
+ when ".doc" then :word
41
+ when ".xls" then :excel
42
+ when ".ppt" then :power_point
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,5 @@
1
+
2
+ module DeltaAttack
3
+ VERSION = "0.1.0"
4
+ end
5
+
data/lib/vendor/README ADDED
@@ -0,0 +1,8 @@
1
+ Download POI from <http://poi.apache.org/> version =< 3.1
2
+
3
+ and symlink here poi-current.jar
4
+
5
+ example,
6
+
7
+ $ ls -l lib/vendor/
8
+ lrwxr-xr-x 1 you us 40 Sep 12 09:54 poi-current.jar -> poi-3.1-FINAL/poi-3.1-FINAL-20080629.jar
@@ -0,0 +1,23 @@
1
+ require File.expand_path("../spec_helper", File.dirname(__FILE__))
2
+ require 'delta_attack/extractor/excel'
3
+ require 'java'
4
+ require 'timeout'
5
+
6
+ describe DeltaAttack::Extractor::Excel do
7
+ include SpecHelper
8
+ before do
9
+ content = File.read(sample_data("13TOKYO.xls"))
10
+ @xls = DeltaAttack::Extractor::Excel.new(content.to_java_bytes)
11
+ end
12
+
13
+ it { @xls.bytes.should_not be_nil }
14
+ it "data[0][0].should == 13101" do
15
+ @xls.data[0][0][0].should == 13101
16
+ end
17
+
18
+ it "2nd call of data() should be cached" do
19
+ @xls.data # 1st.
20
+ lambda{ timeout(0.1){ @xls.data } }.should_not raise_error(Timeout::Error)
21
+ end
22
+ end
23
+
@@ -0,0 +1,23 @@
1
+ require File.expand_path("../spec_helper", File.dirname(__FILE__))
2
+ require 'delta_attack/extractor/power_point'
3
+ require 'java'
4
+ require 'timeout'
5
+
6
+ describe DeltaAttack::Extractor::PowerPoint do
7
+ include SpecHelper
8
+ before do
9
+ content = File.read(sample_data("named_scope06.ppt"))
10
+ @ppt = DeltaAttack::Extractor::PowerPoint.new(content.to_java_bytes)
11
+ end
12
+
13
+ it { @ppt.bytes.should_not be_nil }
14
+ it "data.flatten.first.should == /named_scope/" do
15
+ @ppt.data.flatten.first.should =~ /named_scope/
16
+ end
17
+
18
+ it "2nd call of data() should be cached" do
19
+ @ppt.data # 1st.
20
+ lambda{ timeout(0.1){ @ppt.data } }.should_not raise_error(Timeout::Error)
21
+ end
22
+ end
23
+
@@ -0,0 +1,24 @@
1
+ require File.expand_path("../spec_helper", File.dirname(__FILE__))
2
+ require 'delta_attack/extractor/word'
3
+ require 'java'
4
+ require 'timeout'
5
+ $KCODE = "u"
6
+
7
+ describe DeltaAttack::Extractor::Word do
8
+ include SpecHelper
9
+ before do
10
+ content = File.read(sample_data("myblog.doc"))
11
+ @doc = DeltaAttack::Extractor::Word.new(content.to_java_bytes)
12
+ end
13
+
14
+ it { @doc.bytes.should_not be_nil }
15
+ it "data.flatten.first.should =~ /WEBrick/" do
16
+ @doc.data.flatten.first.should =~ /WEBrick/
17
+ end
18
+
19
+ it "2nd call of data() should be cached" do
20
+ @doc.data # 1st.
21
+ lambda{ timeout(0.1){ @doc.data } }.should_not raise_error(Timeout::Error)
22
+ end
23
+ end
24
+
@@ -0,0 +1,51 @@
1
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
2
+ require 'delta_attack/filetype_assumption'
3
+
4
+ describe DeltaAttack::FiletypeAssumption do
5
+ include SpecHelper
6
+ it "should not support_magic" do
7
+ DeltaAttack::FiletypeAssumption.should_not be_support_magic
8
+ end
9
+
10
+ describe "new('hoge.xls')" do
11
+ before do
12
+ @asm = DeltaAttack::FiletypeAssumption.new('hoge.xls')
13
+ end
14
+
15
+ it "filetype.should == :excel" do
16
+ @asm.filetype.should == :excel
17
+ end
18
+ end
19
+
20
+ describe "new('hoge.dat', 'application/vnd.ms-excel')" do
21
+ before do
22
+ @asm = DeltaAttack::FiletypeAssumption.new('hoge.dat', 'application/vnd.ms-excel')
23
+ end
24
+
25
+ it "filetype.should == :excel" do
26
+ @asm.filetype.should == :excel
27
+ end
28
+ end
29
+
30
+ describe "new('hoge.dat', 'application/octet-stream')" do
31
+ before do
32
+ @asm = DeltaAttack::FiletypeAssumption.new('hoge.dat', 'application/octet-stream')
33
+ end
34
+
35
+ it "filetype.should == :unknown" do
36
+ @asm.filetype.should == :unknown
37
+ end
38
+ end
39
+
40
+ describe "new('hoge.dat', 'application/octet-stream', <content>)" do
41
+ before do
42
+ content = File.read(sample_data("13TOKYO.xls"))
43
+ @asm = DeltaAttack::FiletypeAssumption.new('hoge.dat', 'application/octet-stream', content)
44
+ end
45
+
46
+ it "filetype.should == :excel" do
47
+ pending "mahoro is not installed" unless DeltaAttack::FiletypeAssumption.support_magic?
48
+ @asm.filetype.should == :excel
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ # vim:set fileencoding=utf-8 filetype=ruby
3
+ $KCODE = 'u'
4
+
5
+ require 'rubygems'
6
+ $:.unshift(File.expand_path("../lib", File.dirname(__FILE__)))
7
+
8
+ module SpecHelper
9
+ def sample_data(name)
10
+ File.expand_path("../samples/data/" + name, File.dirname(__FILE__))
11
+ end
12
+ end
13
+
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: moro-delta_attack
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - MOROHASHI Kyosuke
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-09-22 00:00:00 -07:00
13
+ default_executable: delta_attack_server
14
+ dependencies: []
15
+
16
+ description: extract text from MS Office document with Apache POI
17
+ email: moronatural@gmail.com
18
+ executables:
19
+ - delta_attack_server
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ - ChangeLog
25
+ files:
26
+ - README
27
+ - NOTICE
28
+ - ChangeLog
29
+ - Rakefile
30
+ - bin/delta_attack_server
31
+ - spec/extractor
32
+ - spec/extractor/excel_spec.rb
33
+ - spec/extractor/power_point_spec.rb
34
+ - spec/extractor/word_spec.rb
35
+ - spec/filetype_assumption_spec.rb
36
+ - spec/spec_helper.rb
37
+ - lib/delta_attack
38
+ - lib/delta_attack/client.rb
39
+ - lib/delta_attack/extractor
40
+ - lib/delta_attack/extractor/base.rb
41
+ - lib/delta_attack/extractor/excel.rb
42
+ - lib/delta_attack/extractor/power_point.rb
43
+ - lib/delta_attack/extractor/servlet.rb
44
+ - lib/delta_attack/extractor/word.rb
45
+ - lib/delta_attack/extractor.rb
46
+ - lib/delta_attack/filetype_assumption.rb
47
+ - lib/delta_attack.rb
48
+ - lib/vendor
49
+ - lib/vendor/README
50
+ has_rdoc: true
51
+ homepage: http://github.com/moro/delta_attack
52
+ post_install_message:
53
+ rdoc_options:
54
+ - --title
55
+ - delta_attack documentation
56
+ - --charset
57
+ - utf-8
58
+ - --opname
59
+ - index.html
60
+ - --line-numbers
61
+ - --main
62
+ - README
63
+ - --inline-source
64
+ - --exclude
65
+ - ^(examples|extras)/
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: "0"
73
+ version:
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ version:
80
+ requirements: []
81
+
82
+ rubyforge_project:
83
+ rubygems_version: 1.2.0
84
+ signing_key:
85
+ specification_version: 2
86
+ summary: extract text from MS Office document with Apache POI
87
+ test_files: []
88
+