docx_tools 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f5a2434c4452cf3eb2b6602513bbac7da765114d
4
+ data.tar.gz: 3e5d338708722a05adb60937b6ebc161dce2a2e9
5
+ SHA512:
6
+ metadata.gz: 1864672d009f8bc460c2730aabb4f9fa5818957b04c80d020cf8b0fd6e428a4272e1c1f9ef70165b93ca4a96a1d260606a31a8f897144d2a3a50a05e95cfb001
7
+ data.tar.gz: 7b904651582f0ac616d291ce0b751478fee02945c0228d0a47c481cb5da99972574df4c407b803528037aac31e522eb34129d60bf80b4a65283f14e511c470de
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ /.bundle/
2
+ /.yardoc/
3
+ /coverage/
4
+ /doc/
5
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,35 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ docx_tools (0.0.1)
5
+ nokogiri (~> 1.6, >= 1.6.6)
6
+ rubyzip (~> 1.1, >= 1.1.7)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ docile (1.1.5)
12
+ json (1.8.3)
13
+ mini_portile (0.6.2)
14
+ minitest (5.8.1)
15
+ nokogiri (1.6.6.2)
16
+ mini_portile (~> 0.6.0)
17
+ rubyzip (1.1.7)
18
+ simplecov (0.10.0)
19
+ docile (~> 1.1.0)
20
+ json (~> 1.8)
21
+ simplecov-html (~> 0.10.0)
22
+ simplecov-html (0.10.0)
23
+ yard (0.8.7.6)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ docx_tools!
30
+ minitest (~> 5.8)
31
+ simplecov (~> 0.10)
32
+ yard (~> 0.8)
33
+
34
+ BUNDLED WITH
35
+ 1.10.6
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2015 DrugDev, Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ = DocxTools
2
+
3
+ This project rocks and uses MIT-LICENSE.
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+ require 'rake/testtask'
9
+
10
+ RDoc::Task.new(:rdoc) do |rdoc|
11
+ rdoc.rdoc_dir = 'rdoc'
12
+ rdoc.title = 'DocxTools'
13
+ rdoc.options << '--line-numbers'
14
+ rdoc.rdoc_files.include('README.rdoc')
15
+ rdoc.rdoc_files.include('lib/**/*.rb')
16
+ end
17
+
18
+ Rake::TestTask.new(:test) do |t|
19
+ t.libs << 'lib'
20
+ t.libs << 'test'
21
+ t.pattern = 'test/**/*_test.rb'
22
+ t.verbose = false
23
+ end
24
+
25
+ task default: :test
data/bin/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'docx_tools'
5
+
6
+ require 'irb'
7
+ IRB.start
@@ -0,0 +1,25 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'docx_tools/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'docx_tools'
7
+ spec.version = DocxTools::VERSION
8
+ spec.authors = ['Kevin Deisz']
9
+ spec.email = ['info@trialnetworks.com']
10
+ spec.homepage = 'https://github.com/drugdev/docx_tools'
11
+
12
+ spec.summary = 'Tools for manipulating docx files'
13
+ spec.description = 'An API for managing merge fields within docx files'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test)/}) }
17
+ spec.require_paths = ['lib']
18
+
19
+ spec.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.6'
20
+ spec.add_runtime_dependency 'rubyzip', '~> 1.1', '>= 1.1.7'
21
+
22
+ spec.add_development_dependency 'minitest', '~> 5.8'
23
+ spec.add_development_dependency 'simplecov', '~> 0.10'
24
+ spec.add_development_dependency 'yard', '~> 0.8'
25
+ end
@@ -0,0 +1,22 @@
1
+ module DocxTools
2
+ class Document
3
+
4
+ # the stored zip file (docx)
5
+ attr_accessor :zip
6
+
7
+ # read the filepath and parse it as a zip file
8
+ def initialize(filepath)
9
+ self.zip = Zip::File.open(filepath)
10
+ end
11
+
12
+ # the entries contained within the zip
13
+ def entries
14
+ zip.entries
15
+ end
16
+
17
+ # read the file within the zip at the given filename
18
+ def get(filename)
19
+ Nokogiri::XML(zip.read(filename))
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,111 @@
1
+ module DocxTools
2
+ class MailMerge
3
+
4
+ REGEXP = / MERGEFIELD "?([^ ]+?)"? (| \\\* MERGEFORMAT )/i.freeze
5
+ attr_accessor :document, :part_list
6
+
7
+ def initialize(file_object)
8
+ self.document = Document.new(file_object)
9
+ self.part_list = PartList.new(document, %w[document.main header footer])
10
+ process_merge_fields
11
+ end
12
+
13
+ def fields
14
+ fields = Set.new
15
+ part_list.each_part do |part|
16
+ part.xpath('.//w:MergeField').each do |mf|
17
+ fields.add(mf.content)
18
+ end
19
+ end
20
+ fields.to_a
21
+ end
22
+
23
+ def merge(replacements = {})
24
+ part_list.each_part do |part|
25
+ replacements.each do |field, text|
26
+ merge_field(part, field, text)
27
+ end
28
+ end
29
+ end
30
+
31
+ def write(filename)
32
+ File.open(filename, 'w') do |file|
33
+ file.write(generate.string)
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def clean_up
40
+ remaining = fields.map { |field| [field.to_sym, ''] }
41
+ merge(remaining.to_h)
42
+ end
43
+
44
+ def generate
45
+ clean_up
46
+ buffer = Zip::OutputStream.write_buffer do |out|
47
+ document.entries.each do |entry|
48
+ unless entry.ftype == :directory
49
+ out.put_next_entry(entry.name)
50
+ if self.part_list.has?(entry.name)
51
+ out.write self.part_list.get(entry.name).to_xml(indent: 0).gsub('\n', '')
52
+ else
53
+ out.write entry.get_input_stream.read
54
+ end
55
+ end
56
+ end
57
+ end
58
+ buffer.seek(0)
59
+ buffer
60
+ end
61
+
62
+ def merge_field(part, field, text)
63
+ part.xpath(".//w:MergeField[text()=\"#{field}\"]").each do |merge_field|
64
+ r_elem = Nokogiri::XML::Node.new('r', part)
65
+ t_elem = Nokogiri::XML::Node.new('t', part)
66
+ t_elem.content = text
67
+ t_elem.parent = r_elem
68
+ merge_field.replace(r_elem)
69
+ end
70
+ end
71
+
72
+ # replace the original convoluted tag with a simplified tag for easy searching a processing
73
+ def process_merge_fields
74
+ self.part_list.each_part do |part|
75
+ part.root.remove_attribute('Ignorable')
76
+
77
+ part.xpath('.//w:fldSimple/..').each do |parent|
78
+ parent.children.each do |child|
79
+ match_data = REGEXP.match(child.attribute('instr'))
80
+ next if (child.node_name != 'fldSimple') || !match_data
81
+
82
+ new_tag = Nokogiri::XML::Node.new('MergeField', part)
83
+ new_tag.content = match_data[1]
84
+ child.replace(new_tag)
85
+ end
86
+ end
87
+
88
+ part.xpath('.//w:instrText/../..').each do |parent|
89
+ begin_tags = parent.xpath('w:r/w:fldChar[@w:fldCharType="begin"]/..')
90
+ end_tags = parent.xpath('w:r/w:fldChar[@w:fldCharType="end"]/..')
91
+ instr_tags = parent.xpath('w:r/w:instrText').map(&:content)
92
+
93
+ instr_tags.take(begin_tags.length).each_with_index do |instr, idx|
94
+ next unless match_data = REGEXP.match(instr)
95
+
96
+ children = parent.children
97
+ start_idx = children.index(begin_tags[idx]) + 1
98
+ end_idx = children.index(end_tags[idx])
99
+ children[start_idx..end_idx].each do |child|
100
+ child.remove
101
+ end
102
+
103
+ new_tag = Nokogiri::XML::Node.new('MergeField', part)
104
+ new_tag.content = match_data[1]
105
+ begin_tags[idx].replace(new_tag)
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,39 @@
1
+ module DocxTools
2
+ class PartList
3
+
4
+ # the stored list of parts
5
+ attr_accessor :parts
6
+
7
+ # parse the content type entries out of the given document
8
+ def initialize(document, content_types)
9
+ self.parts = {}
10
+
11
+ content_types.map!(&method(:expand_type))
12
+ document.get('[Content_Types].xml').xpath(content_types.join(' | ')).each do |tag|
13
+ filename = tag['PartName'].split('/', 2)[1]
14
+ parts[filename] = document.get(filename)
15
+ end
16
+ end
17
+
18
+ # yield each part to the block
19
+ def each_part(&block)
20
+ parts.values.each(&block)
21
+ end
22
+
23
+ # get the requested part
24
+ def get(filename)
25
+ parts[filename]
26
+ end
27
+
28
+ # true if this part list has extracted this part from the document
29
+ def has?(filename)
30
+ parts.key?(filename)
31
+ end
32
+
33
+ private
34
+
35
+ def expand_type(content_type)
36
+ "xmlns:Types/xmlns:Override[@ContentType='application/vnd.openxmlformats-officedocument.wordprocessingml.#{content_type}+xml']"
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,4 @@
1
+ module DocxTools
2
+
3
+ VERSION = '0.0.1'
4
+ end
data/lib/docx_tools.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'nokogiri'
2
+ require 'set'
3
+ require 'zip'
4
+
5
+ require 'docx_tools/document'
6
+ require 'docx_tools/mail_merge'
7
+ require 'docx_tools/part_list'
8
+ require 'docx_tools/version'
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docx_tools
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Deisz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.6.6
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.6'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.6.6
33
+ - !ruby/object:Gem::Dependency
34
+ name: rubyzip
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.1'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.1.7
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '1.1'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.1.7
53
+ - !ruby/object:Gem::Dependency
54
+ name: minitest
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '5.8'
60
+ type: :development
61
+ prerelease: false
62
+ version_requirements: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '5.8'
67
+ - !ruby/object:Gem::Dependency
68
+ name: simplecov
69
+ requirement: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - "~>"
72
+ - !ruby/object:Gem::Version
73
+ version: '0.10'
74
+ type: :development
75
+ prerelease: false
76
+ version_requirements: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - "~>"
79
+ - !ruby/object:Gem::Version
80
+ version: '0.10'
81
+ - !ruby/object:Gem::Dependency
82
+ name: yard
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - "~>"
86
+ - !ruby/object:Gem::Version
87
+ version: '0.8'
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - "~>"
93
+ - !ruby/object:Gem::Version
94
+ version: '0.8'
95
+ description: An API for managing merge fields within docx files
96
+ email:
97
+ - info@trialnetworks.com
98
+ executables: []
99
+ extensions: []
100
+ extra_rdoc_files: []
101
+ files:
102
+ - ".gitignore"
103
+ - Gemfile
104
+ - Gemfile.lock
105
+ - MIT-LICENSE
106
+ - README.md
107
+ - Rakefile
108
+ - bin/console
109
+ - docx_tools.gemspec
110
+ - lib/docx_tools.rb
111
+ - lib/docx_tools/document.rb
112
+ - lib/docx_tools/mail_merge.rb
113
+ - lib/docx_tools/part_list.rb
114
+ - lib/docx_tools/version.rb
115
+ homepage: https://github.com/drugdev/docx_tools
116
+ licenses:
117
+ - MIT
118
+ metadata: {}
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 2.4.5
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: Tools for manipulating docx files
139
+ test_files: []
140
+ has_rdoc: