docx 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.md +21 -0
- data/README.md +32 -0
- data/lib/docx.rb +5 -0
- data/lib/docx/containers.rb +2 -0
- data/lib/docx/containers/paragraph.rb +23 -0
- data/lib/docx/containers/text_run.rb +35 -0
- data/lib/docx/document.rb +27 -0
- data/lib/docx/parser.rb +49 -0
- data/lib/docx/version.rb +3 -0
- metadata +75 -0
data/LICENSE.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) Marcus Ortiz, http://marcusortiz.com
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# docx
|
2
|
+
|
3
|
+
a ruby library/gem for interacting with `.docx` files
|
4
|
+
|
5
|
+
## usage
|
6
|
+
|
7
|
+
### basic
|
8
|
+
|
9
|
+
``` ruby
|
10
|
+
require 'docx'
|
11
|
+
|
12
|
+
d = Docx::Document.open('example.docx')
|
13
|
+
d.each_paragraph do |p|
|
14
|
+
puts d
|
15
|
+
end
|
16
|
+
```
|
17
|
+
|
18
|
+
### advanced
|
19
|
+
|
20
|
+
``` ruby
|
21
|
+
require 'docx'
|
22
|
+
|
23
|
+
d = Docx::Document.open('example.docx')
|
24
|
+
d.each_paragraph do |p|
|
25
|
+
p.each_text_run do |run|
|
26
|
+
run.italicized?
|
27
|
+
run.bolded?
|
28
|
+
run.underlined?
|
29
|
+
run.formatting
|
30
|
+
run.text
|
31
|
+
end
|
32
|
+
end
|
data/lib/docx.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'docx/containers/text_run'
|
2
|
+
|
3
|
+
module Docx
|
4
|
+
module Containers
|
5
|
+
class Paragraph
|
6
|
+
attr_accessor :text_runs
|
7
|
+
|
8
|
+
def initialize(txt_runs)
|
9
|
+
@text_runs = txt_runs
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_s
|
13
|
+
@text_runs.map(&:text).join('')
|
14
|
+
end
|
15
|
+
|
16
|
+
def each_text_run
|
17
|
+
@text_runs.each { |tr| yield(tr) }
|
18
|
+
end
|
19
|
+
|
20
|
+
alias_method :text, :to_s
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Docx
|
2
|
+
module Containers
|
3
|
+
class TextRun
|
4
|
+
DEFAULT_FORMATTING = {
|
5
|
+
italic: false,
|
6
|
+
bold: false,
|
7
|
+
underline: false
|
8
|
+
}
|
9
|
+
|
10
|
+
attr_reader :text
|
11
|
+
attr_reader :formatting
|
12
|
+
|
13
|
+
def initialize(attrs)
|
14
|
+
@text = attrs[:text] || ''
|
15
|
+
@formatting = attrs[:formatting] || DEFAULT_FORMATTING
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
@text
|
20
|
+
end
|
21
|
+
|
22
|
+
def italicized?
|
23
|
+
@formatting[:italic]
|
24
|
+
end
|
25
|
+
|
26
|
+
def bolded?
|
27
|
+
@formatting[:bold]
|
28
|
+
end
|
29
|
+
|
30
|
+
def underlined?
|
31
|
+
@formatting[:underline]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'docx/parser'
|
2
|
+
|
3
|
+
module Docx
|
4
|
+
class Document
|
5
|
+
attr_reader :paragraphs
|
6
|
+
|
7
|
+
def initialize(path)
|
8
|
+
Parser.new(File.expand_path(path)) do |p|
|
9
|
+
@paragraphs = p.paragraphs
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.open(path)
|
14
|
+
self.new(path)
|
15
|
+
end
|
16
|
+
|
17
|
+
def each_paragraph
|
18
|
+
@paragraphs.each { |p| yield(p) }
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_s
|
22
|
+
@paragraphs.map(&:to_s).join("\n")
|
23
|
+
end
|
24
|
+
|
25
|
+
alias_method :text, :to_s
|
26
|
+
end
|
27
|
+
end
|
data/lib/docx/parser.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'docx/containers'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'zip/zip'
|
4
|
+
|
5
|
+
module Docx
|
6
|
+
class Parser
|
7
|
+
def initialize(path)
|
8
|
+
@zip = Zip::ZipFile.open(path)
|
9
|
+
@xml = Nokogiri::XML(@zip.find_entry('word/document.xml').get_input_stream)
|
10
|
+
|
11
|
+
if block_given?
|
12
|
+
yield self
|
13
|
+
@zip.close
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def paragraphs
|
18
|
+
@xml.xpath('//w:document//w:body//w:p').map { |p_node| parse_paragraph_from p_node }
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def parse_paragraph_from(p_node)
|
24
|
+
Containers::Paragraph.new(parse_runs_from(p_node))
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_runs_from(p_node)
|
28
|
+
p_node.xpath('w:r').map do |r_node|
|
29
|
+
rpr_node = r_node.xpath('w:rPr')
|
30
|
+
Containers::TextRun.new({
|
31
|
+
text: parse_text_from(r_node),
|
32
|
+
formatting: parse_formatting_from(rpr_node)
|
33
|
+
})
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse_text_from(r_node)
|
38
|
+
r_node.xpath('w:t').map(&:text).join('')
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_formatting_from(rpr_node)
|
42
|
+
{
|
43
|
+
italic: !rpr_node.xpath('w:i').empty?,
|
44
|
+
bold: !rpr_node.xpath('w:b').empty?,
|
45
|
+
underline: !rpr_node.xpath('w:u').empty?
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/docx/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: docx
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Marcus Ortiz
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-04-12 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &70332799492380 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.5'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70332799492380
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rubyzip
|
27
|
+
requirement: &70332799491720 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0.9'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70332799491720
|
36
|
+
description: a ruby library/gem for interacting with .docx files
|
37
|
+
email: mportiz08@gmail.com
|
38
|
+
executables: []
|
39
|
+
extensions: []
|
40
|
+
extra_rdoc_files: []
|
41
|
+
files:
|
42
|
+
- README.md
|
43
|
+
- LICENSE.md
|
44
|
+
- lib/docx/containers/paragraph.rb
|
45
|
+
- lib/docx/containers/text_run.rb
|
46
|
+
- lib/docx/containers.rb
|
47
|
+
- lib/docx/document.rb
|
48
|
+
- lib/docx/parser.rb
|
49
|
+
- lib/docx/version.rb
|
50
|
+
- lib/docx.rb
|
51
|
+
homepage: https://github.com/mportiz08/docx
|
52
|
+
licenses: []
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.8.11
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: a ruby library/gem for interacting with .docx files
|
75
|
+
test_files: []
|