docx_tools 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f5a2434c4452cf3eb2b6602513bbac7da765114d
4
+ data.tar.gz: 3e5d338708722a05adb60937b6ebc161dce2a2e9
5
+ SHA512:
6
+ metadata.gz: 1864672d009f8bc460c2730aabb4f9fa5818957b04c80d020cf8b0fd6e428a4272e1c1f9ef70165b93ca4a96a1d260606a31a8f897144d2a3a50a05e95cfb001
7
+ data.tar.gz: 7b904651582f0ac616d291ce0b751478fee02945c0228d0a47c481cb5da99972574df4c407b803528037aac31e522eb34129d60bf80b4a65283f14e511c470de
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ /.bundle/
2
+ /.yardoc/
3
+ /coverage/
4
+ /doc/
5
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,35 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ docx_tools (0.0.1)
5
+ nokogiri (~> 1.6, >= 1.6.6)
6
+ rubyzip (~> 1.1, >= 1.1.7)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ docile (1.1.5)
12
+ json (1.8.3)
13
+ mini_portile (0.6.2)
14
+ minitest (5.8.1)
15
+ nokogiri (1.6.6.2)
16
+ mini_portile (~> 0.6.0)
17
+ rubyzip (1.1.7)
18
+ simplecov (0.10.0)
19
+ docile (~> 1.1.0)
20
+ json (~> 1.8)
21
+ simplecov-html (~> 0.10.0)
22
+ simplecov-html (0.10.0)
23
+ yard (0.8.7.6)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ docx_tools!
30
+ minitest (~> 5.8)
31
+ simplecov (~> 0.10)
32
+ yard (~> 0.8)
33
+
34
+ BUNDLED WITH
35
+ 1.10.6
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2015 DrugDev, Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ = DocxTools
2
+
3
+ This project rocks and uses MIT-LICENSE.
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+ require 'rake/testtask'
9
+
10
+ RDoc::Task.new(:rdoc) do |rdoc|
11
+ rdoc.rdoc_dir = 'rdoc'
12
+ rdoc.title = 'DocxTools'
13
+ rdoc.options << '--line-numbers'
14
+ rdoc.rdoc_files.include('README.rdoc')
15
+ rdoc.rdoc_files.include('lib/**/*.rb')
16
+ end
17
+
18
+ Rake::TestTask.new(:test) do |t|
19
+ t.libs << 'lib'
20
+ t.libs << 'test'
21
+ t.pattern = 'test/**/*_test.rb'
22
+ t.verbose = false
23
+ end
24
+
25
+ task default: :test
data/bin/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'docx_tools'
5
+
6
+ require 'irb'
7
+ IRB.start
@@ -0,0 +1,25 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'docx_tools/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'docx_tools'
7
+ spec.version = DocxTools::VERSION
8
+ spec.authors = ['Kevin Deisz']
9
+ spec.email = ['info@trialnetworks.com']
10
+ spec.homepage = 'https://github.com/drugdev/docx_tools'
11
+
12
+ spec.summary = 'Tools for manipulating docx files'
13
+ spec.description = 'An API for managing merge fields within docx files'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test)/}) }
17
+ spec.require_paths = ['lib']
18
+
19
+ spec.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.6'
20
+ spec.add_runtime_dependency 'rubyzip', '~> 1.1', '>= 1.1.7'
21
+
22
+ spec.add_development_dependency 'minitest', '~> 5.8'
23
+ spec.add_development_dependency 'simplecov', '~> 0.10'
24
+ spec.add_development_dependency 'yard', '~> 0.8'
25
+ end
@@ -0,0 +1,22 @@
1
+ module DocxTools
2
+ class Document
3
+
4
+ # the stored zip file (docx)
5
+ attr_accessor :zip
6
+
7
+ # read the filepath and parse it as a zip file
8
+ def initialize(filepath)
9
+ self.zip = Zip::File.open(filepath)
10
+ end
11
+
12
+ # the entries contained within the zip
13
+ def entries
14
+ zip.entries
15
+ end
16
+
17
+ # read the file within the zip at the given filename
18
+ def get(filename)
19
+ Nokogiri::XML(zip.read(filename))
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,111 @@
1
+ module DocxTools
2
+ class MailMerge
3
+
4
+ REGEXP = / MERGEFIELD "?([^ ]+?)"? (| \\\* MERGEFORMAT )/i.freeze
5
+ attr_accessor :document, :part_list
6
+
7
+ def initialize(file_object)
8
+ self.document = Document.new(file_object)
9
+ self.part_list = PartList.new(document, %w[document.main header footer])
10
+ process_merge_fields
11
+ end
12
+
13
+ def fields
14
+ fields = Set.new
15
+ part_list.each_part do |part|
16
+ part.xpath('.//w:MergeField').each do |mf|
17
+ fields.add(mf.content)
18
+ end
19
+ end
20
+ fields.to_a
21
+ end
22
+
23
+ def merge(replacements = {})
24
+ part_list.each_part do |part|
25
+ replacements.each do |field, text|
26
+ merge_field(part, field, text)
27
+ end
28
+ end
29
+ end
30
+
31
+ def write(filename)
32
+ File.open(filename, 'w') do |file|
33
+ file.write(generate.string)
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def clean_up
40
+ remaining = fields.map { |field| [field.to_sym, ''] }
41
+ merge(remaining.to_h)
42
+ end
43
+
44
+ def generate
45
+ clean_up
46
+ buffer = Zip::OutputStream.write_buffer do |out|
47
+ document.entries.each do |entry|
48
+ unless entry.ftype == :directory
49
+ out.put_next_entry(entry.name)
50
+ if self.part_list.has?(entry.name)
51
+ out.write self.part_list.get(entry.name).to_xml(indent: 0).gsub('\n', '')
52
+ else
53
+ out.write entry.get_input_stream.read
54
+ end
55
+ end
56
+ end
57
+ end
58
+ buffer.seek(0)
59
+ buffer
60
+ end
61
+
62
+ def merge_field(part, field, text)
63
+ part.xpath(".//w:MergeField[text()=\"#{field}\"]").each do |merge_field|
64
+ r_elem = Nokogiri::XML::Node.new('r', part)
65
+ t_elem = Nokogiri::XML::Node.new('t', part)
66
+ t_elem.content = text
67
+ t_elem.parent = r_elem
68
+ merge_field.replace(r_elem)
69
+ end
70
+ end
71
+
72
+ # replace the original convoluted tag with a simplified tag for easy searching a processing
73
+ def process_merge_fields
74
+ self.part_list.each_part do |part|
75
+ part.root.remove_attribute('Ignorable')
76
+
77
+ part.xpath('.//w:fldSimple/..').each do |parent|
78
+ parent.children.each do |child|
79
+ match_data = REGEXP.match(child.attribute('instr'))
80
+ next if (child.node_name != 'fldSimple') || !match_data
81
+
82
+ new_tag = Nokogiri::XML::Node.new('MergeField', part)
83
+ new_tag.content = match_data[1]
84
+ child.replace(new_tag)
85
+ end
86
+ end
87
+
88
+ part.xpath('.//w:instrText/../..').each do |parent|
89
+ begin_tags = parent.xpath('w:r/w:fldChar[@w:fldCharType="begin"]/..')
90
+ end_tags = parent.xpath('w:r/w:fldChar[@w:fldCharType="end"]/..')
91
+ instr_tags = parent.xpath('w:r/w:instrText').map(&:content)
92
+
93
+ instr_tags.take(begin_tags.length).each_with_index do |instr, idx|
94
+ next unless match_data = REGEXP.match(instr)
95
+
96
+ children = parent.children
97
+ start_idx = children.index(begin_tags[idx]) + 1
98
+ end_idx = children.index(end_tags[idx])
99
+ children[start_idx..end_idx].each do |child|
100
+ child.remove
101
+ end
102
+
103
+ new_tag = Nokogiri::XML::Node.new('MergeField', part)
104
+ new_tag.content = match_data[1]
105
+ begin_tags[idx].replace(new_tag)
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,39 @@
1
+ module DocxTools
2
+ class PartList
3
+
4
+ # the stored list of parts
5
+ attr_accessor :parts
6
+
7
+ # parse the content type entries out of the given document
8
+ def initialize(document, content_types)
9
+ self.parts = {}
10
+
11
+ content_types.map!(&method(:expand_type))
12
+ document.get('[Content_Types].xml').xpath(content_types.join(' | ')).each do |tag|
13
+ filename = tag['PartName'].split('/', 2)[1]
14
+ parts[filename] = document.get(filename)
15
+ end
16
+ end
17
+
18
+ # yield each part to the block
19
+ def each_part(&block)
20
+ parts.values.each(&block)
21
+ end
22
+
23
+ # get the requested part
24
+ def get(filename)
25
+ parts[filename]
26
+ end
27
+
28
+ # true if this part list has extracted this part from the document
29
+ def has?(filename)
30
+ parts.key?(filename)
31
+ end
32
+
33
+ private
34
+
35
+ def expand_type(content_type)
36
+ "xmlns:Types/xmlns:Override[@ContentType='application/vnd.openxmlformats-officedocument.wordprocessingml.#{content_type}+xml']"
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,4 @@
1
+ module DocxTools
2
+
3
+ VERSION = '0.0.1'
4
+ end
data/lib/docx_tools.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'nokogiri'
2
+ require 'set'
3
+ require 'zip'
4
+
5
+ require 'docx_tools/document'
6
+ require 'docx_tools/mail_merge'
7
+ require 'docx_tools/part_list'
8
+ require 'docx_tools/version'
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docx_tools
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Deisz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.6.6
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.6'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.6.6
33
+ - !ruby/object:Gem::Dependency
34
+ name: rubyzip
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.1'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.1.7
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '1.1'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.1.7
53
+ - !ruby/object:Gem::Dependency
54
+ name: minitest
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '5.8'
60
+ type: :development
61
+ prerelease: false
62
+ version_requirements: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '5.8'
67
+ - !ruby/object:Gem::Dependency
68
+ name: simplecov
69
+ requirement: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - "~>"
72
+ - !ruby/object:Gem::Version
73
+ version: '0.10'
74
+ type: :development
75
+ prerelease: false
76
+ version_requirements: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - "~>"
79
+ - !ruby/object:Gem::Version
80
+ version: '0.10'
81
+ - !ruby/object:Gem::Dependency
82
+ name: yard
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - "~>"
86
+ - !ruby/object:Gem::Version
87
+ version: '0.8'
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - "~>"
93
+ - !ruby/object:Gem::Version
94
+ version: '0.8'
95
+ description: An API for managing merge fields within docx files
96
+ email:
97
+ - info@trialnetworks.com
98
+ executables: []
99
+ extensions: []
100
+ extra_rdoc_files: []
101
+ files:
102
+ - ".gitignore"
103
+ - Gemfile
104
+ - Gemfile.lock
105
+ - MIT-LICENSE
106
+ - README.md
107
+ - Rakefile
108
+ - bin/console
109
+ - docx_tools.gemspec
110
+ - lib/docx_tools.rb
111
+ - lib/docx_tools/document.rb
112
+ - lib/docx_tools/mail_merge.rb
113
+ - lib/docx_tools/part_list.rb
114
+ - lib/docx_tools/version.rb
115
+ homepage: https://github.com/drugdev/docx_tools
116
+ licenses:
117
+ - MIT
118
+ metadata: {}
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 2.4.5
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: Tools for manipulating docx files
139
+ test_files: []
140
+ has_rdoc: