docx2txt 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7644ad54195f4d768b27e39e24adc29cf07f6864
4
+ data.tar.gz: 7b274d12dba47626413924f8c854cc3e572cf21b
5
+ SHA512:
6
+ metadata.gz: b9cdc6b7ae36641220d68c03f4edee66825c236da676f3b17ea9c83a8cc5bb0fcedff422acc4b890a35584dbb54988444d35c62abf15b313dc2867f540dbbe42
7
+ data.tar.gz: 5fd6717ebc51e50fd0538f869706baa40002aec765bac3e9b0e2b1d3b074aec4fca621dd4ff8b7abb2854589a97f9aa34ed6d100a57d3dd4e414d26de77bb72d
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
@@ -0,0 +1 @@
1
+ ruby-2.0.0-p353
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
@@ -0,0 +1,20 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ Docx2TXT (0.0.1)
5
+ nokogiri
6
+ rubyzip
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ mini_portile (0.5.2)
12
+ nokogiri (1.6.1)
13
+ mini_portile (~> 0.5.0)
14
+ rubyzip (1.1.0)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ Docx2TXT!
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Thiago R. Colucci
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,32 @@
1
+ # Docx2TXT
2
+
3
+ Extract the simplest TXT (I could imagine) from a MS Docx. It just do a best effort to preserve paragraphs.
4
+
5
+ ## How
6
+
7
+ Instantiate the docx with the location of the file
8
+
9
+ doc = Docx2TXT::Docx.new file_path
10
+
11
+ Later just ask for the txt
12
+
13
+ doc.to_txt
14
+
15
+ ## Simple executable
16
+
17
+ docx2txt <docxfilepath>
18
+
19
+ <!-- ## Code status
20
+
21
+ [![Build Status](https://travis-ci.org/thoughtworks/pacto.png)](https://travis-ci.org/thoughtworks/pacto)
22
+ [![Code Climate](https://codeclimate.com/github/thoughtworks/pacto.png)](https://codeclimate.com/github/thoughtworks/pacto)
23
+ [![Dependency Status](https://gemnasium.com/thoughtworks/pacto.png)](https://gemnasium.com/thoughtworks/pacto)
24
+ [![Coverage Status](https://coveralls.io/repos/thoughtworks/pacto/badge.png)](https://coveralls.io/r/thoughtworks/pacto) -->
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
@@ -0,0 +1,8 @@
1
+ #! /usr/bin/env ruby
2
+ require 'docx2txt'
3
+
4
+ if ARGV.empty?
5
+ puts "Usage:\n\tdocx2txt <path-to-MS-Docx-file>"
6
+ else
7
+ puts Docx2TXT::Docx.new(ARGV.first).to_txt
8
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'docx2txt/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "docx2txt"
8
+ gem.version = Docx2TXT::VERSION
9
+ gem.authors = ["Thiago Colucci"]
10
+ gem.email = ["ticolucci@gmail.com"]
11
+ gem.description = %q{Extract the simplest TXT I could imagine from a Docx. It just do a best effort to preserve paragraphs.}
12
+ gem.summary = %q{Extract the contents of MS Docx to TXT}
13
+ gem.homepage = 'https://github.com/ticolucci/docx2txt'
14
+ gem.license = 'MIT'
15
+
16
+
17
+ gem.files = `git ls-files`.split($/)
18
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
19
+ #gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
20
+ gem.require_paths = ["lib"]
21
+
22
+ gem.add_dependency "nokogiri"
23
+ gem.add_dependency "rubyzip"
24
+ end
@@ -0,0 +1,32 @@
1
+ require "rubygems"
2
+ require "nokogiri"
3
+ require "zip"
4
+
5
+ module Docx2TXT
6
+ class Docx
7
+ def initialize location
8
+ @location = location
9
+ end
10
+
11
+ def to_txt
12
+ get_text get_xml
13
+ end
14
+
15
+ private
16
+
17
+ def get_xml
18
+ Zip::File.new(@location).each do |entry|
19
+ return Nokogiri.XML entry.get_input_stream.readlines.join if entry.name =~ /word.document.xml/
20
+ end
21
+ end
22
+
23
+ def get_text xml
24
+ lines = xml.children.map do |c|
25
+ c.children.map do |i|
26
+ i.children.map(&:text)
27
+ end.flatten
28
+ end.flatten
29
+ lines.join("\n")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Docx2TXT
2
+ VERSION = '0.0.2'
3
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docx2txt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Thiago Colucci
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubyzip
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Extract the simplest TXT I could imagine from a Docx. It just do a best
42
+ effort to preserve paragraphs.
43
+ email:
44
+ - ticolucci@gmail.com
45
+ executables:
46
+ - docx2txt
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - .gitignore
51
+ - .ruby-version
52
+ - Gemfile
53
+ - Gemfile.lock
54
+ - LICENSE
55
+ - README.md
56
+ - bin/docx2txt
57
+ - docx2txt.gemspec
58
+ - lib/docx2txt.rb
59
+ - lib/docx2txt/version.rb
60
+ homepage: https://github.com/ticolucci/docx2txt
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.1.11
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Extract the contents of MS Docx to TXT
84
+ test_files: []