docx 0.4.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9b2cc086dcc2bb5043cc9f20b311f1e4b02f7c9730f459c8b91e48097d8bacb5
4
- data.tar.gz: 8536619dd3cebe92112b7328cf64cdca40be898411d974037d5e9ffc02af1a08
3
+ metadata.gz: 2a33dcd9e31c60144261a15670cd1c01b37877044aaf33f7091b46cc85ab3412
4
+ data.tar.gz: 1911db027b3e2fbf8eb9363eaa99b95231f875afe8de2a0c060b38fee86c7238
5
5
  SHA512:
6
- metadata.gz: 77a0ee6f32422ad64e916b98d52d685958867e574582252d4ce424fac721cee4f73b400f8960ad88c762608024bfa7a5d966bea0cd65e1daecdb4d58967da307
7
- data.tar.gz: e87937fb28f2c6f7d7777b63df9b483fde5670e98261253b91ed55fee4d737f72cc2596542ced45a80ba525a18b9ca7a1760f76440ad71ed30facdfe17fd0cc1
6
+ metadata.gz: 0a823fedf1b0bfc542c88533c787aefa5e9cee12ae4422a01f106985a2f03e8b636aa70820a179f5501ba8f996672f8f75e24356282cd7d81505cbb1984fa967
7
+ data.tar.gz: c06b4078536bd8b12b5c5e4f61fe0ef5f2f9e03e9cbc7c4017f2d01d3e8a0bc9200fa7457fac54c0b110a57ddc4bff8ad74c28d3e873512900266ac00137c6e6
data/README.md CHANGED
@@ -1,12 +1,17 @@
1
1
  # docx
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/docx.svg)](https://badge.fury.io/rb/docx)
4
+ [![Ruby](https://github.com/ruby-docx/docx/workflows/Ruby/badge.svg)](https://github.com/ruby-docx/docx/actions?query=workflow%3ARuby)
5
+ [![Coverage Status](https://coveralls.io/repos/github/ruby-docx/docx/badge.svg?branch=master)](https://coveralls.io/github/ruby-docx/docx?branch=master)
6
+ [![Gitter](https://badges.gitter.im/ruby-docx/community.svg)](https://gitter.im/ruby-docx/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
7
+
3
8
  A ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, reading tables/rows/columns/cells and saving the document.
4
9
 
5
10
  ## Usage
6
11
 
7
12
  ### Prerequisites
8
13
 
9
- - Ruby 2.4 or later
14
+ - Ruby 2.5 or later
10
15
 
11
16
  ### Install
12
17
 
@@ -47,6 +52,17 @@ doc.bookmarks.each_pair do |bookmark_name, bookmark_object|
47
52
  end
48
53
  ```
49
54
 
55
+ Don't have a local file but a buffer? Docx handles those to:
56
+
57
+ ```ruby
58
+ require 'docx'
59
+
60
+ # Create a Docx::Document object from a remote file
61
+ doc = Docx::Document.open(buffer)
62
+
63
+ # Everything about reading is the same as shown above
64
+ ```
65
+
50
66
  ### Rendering html
51
67
  ``` ruby
52
68
  require 'docx'
@@ -118,6 +134,35 @@ end
118
134
  doc.save('example-edited.docx')
119
135
  ```
120
136
 
137
+ ### Writing to tables
138
+
139
+ ``` ruby
140
+ require 'docx'
141
+
142
+ # Create a Docx::Document object for our existing docx file
143
+ doc = Docx::Document.open('tables.docx')
144
+
145
+ # Iterate over each table
146
+ doc.tables.each do |table|
147
+ last_row = table.rows.last
148
+
149
+ # Copy last row and insert a new one before last row
150
+ new_row = last_row.copy
151
+ new_row.insert_before(last_row)
152
+
153
+ # Substitute text in each cell of this new row
154
+ new_row.cells.each do |cell|
155
+ cell.paragraphs.each do |paragraph|
156
+ paragraph.each_text_run do |text|
157
+ text.substitute('_placeholder_', 'replacement value')
158
+ end
159
+ end
160
+ end
161
+ end
162
+
163
+ doc.save('tables-edited.docx')
164
+ ```
165
+
121
166
  ### Advanced
122
167
 
123
168
  ``` ruby
@@ -55,7 +55,7 @@ module Docx
55
55
 
56
56
  # Array of text runs contained within paragraph
57
57
  def text_runs
58
- @node.xpath('w:r|w:hyperlink/w:r').map { |r_node| Containers::TextRun.new(r_node, @document_properties) }
58
+ @node.xpath('w:r|w:hyperlink').map { |r_node| Containers::TextRun.new(r_node, @document_properties) }
59
59
  end
60
60
 
61
61
  # Iterate over each text run within a paragraph
@@ -23,6 +23,8 @@ module Docx
23
23
  def initialize(node, document_properties = {})
24
24
  @node = node
25
25
  @text_nodes = @node.xpath('w:t').map {|t_node| Elements::Text.new(t_node) }
26
+ @text_nodes = @node.xpath('w:t|w:r/w:t').map {|t_node| Elements::Text.new(t_node) }
27
+
26
28
  @properties_tag = 'rPr'
27
29
  @text = parse_text || ''
28
30
  @formatting = parse_formatting || DEFAULT_FORMATTING
@@ -74,6 +76,7 @@ module Docx
74
76
  # No need to be granular with font size down to the span level if it doesn't vary.
75
77
  styles['font-size'] = "#{font_size}pt" if font_size != @font_size
76
78
  html = html_tag(:span, content: html, styles: styles) unless styles.empty?
79
+ html = html_tag(:a, content: html, attributes: {href: href, target: "_blank"}) if hyperlink?
77
80
  return html
78
81
  end
79
82
 
@@ -89,6 +92,18 @@ module Docx
89
92
  @formatting[:underline]
90
93
  end
91
94
 
95
+ def hyperlink?
96
+ @node.name == 'hyperlink'
97
+ end
98
+
99
+ def href
100
+ @document_properties[:hyperlinks][hyperlink_id]
101
+ end
102
+
103
+ def hyperlink_id
104
+ @node.attributes['id'].value
105
+ end
106
+
92
107
  def font_size
93
108
  size_tag = @node.xpath('w:rPr//w:sz').first
94
109
  size_tag ? size_tag.attributes['val'].value.to_i / 2 : @font_size
data/lib/docx/document.rb CHANGED
@@ -20,34 +20,41 @@ module Docx
20
20
  class Document
21
21
  attr_reader :xml, :doc, :zip, :styles
22
22
 
23
- def initialize(path, &block)
23
+ def initialize(path_or_io, options = {})
24
24
  @replace = {}
25
- @zip = Zip::File.open(path)
26
- @document_xml = @zip.read('word/document.xml')
27
- @doc = Nokogiri::XML(@document_xml)
28
- @styles_xml = @zip.read('word/styles.xml')
29
- @styles = Nokogiri::XML(@styles_xml)
30
- if block_given?
31
- yield self
32
- @zip.close
25
+
26
+ # if path-or_io is string && does not contain a null byte
27
+ if (path_or_io.instance_of?(String) && !/\u0000/.match?(path_or_io))
28
+ @zip = Zip::File.open(path_or_io)
29
+ else
30
+ @zip = Zip::File.open_buffer(path_or_io)
33
31
  end
34
- end
35
32
 
33
+ document = @zip.glob('word/document*.xml').first
34
+ raise Errno::ENOENT if document.nil?
35
+
36
+ @document_xml = document.get_input_stream.read
37
+ @doc = Nokogiri::XML(@document_xml)
38
+ load_styles
39
+ yield(self) if block_given?
40
+ ensure
41
+ @zip.close
42
+ end
36
43
 
37
44
  # This stores the current global document properties, for now
38
45
  def document_properties
39
46
  {
40
- font_size: font_size
47
+ font_size: font_size,
48
+ hyperlinks: hyperlinks
41
49
  }
42
50
  end
43
51
 
44
-
45
52
  # With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened +docx+ file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open.
46
53
  # call-seq:
47
54
  # open(filepath) => file
48
55
  # open(filepath) {|file| block } => obj
49
56
  def self.open(path, &block)
50
- self.new(path, &block)
57
+ new(path, &block)
51
58
  end
52
59
 
53
60
  def paragraphs
@@ -55,11 +62,11 @@ module Docx
55
62
  end
56
63
 
57
64
  def bookmarks
58
- bkmrks_hsh = Hash.new
65
+ bkmrks_hsh = {}
59
66
  bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
60
67
  # auto-generated by office 2010
61
- bkmrks_ary.reject! {|b| b.name == "_GoBack" }
62
- bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
68
+ bkmrks_ary.reject! { |b| b.name == '_GoBack' }
69
+ bkmrks_ary.each { |b| bkmrks_hsh[b.name] = b }
63
70
  bkmrks_hsh
64
71
  end
65
72
 
@@ -70,10 +77,23 @@ module Docx
70
77
  # Some documents have this set, others don't.
71
78
  # Values are returned as half-points, so to get points, that's why it's divided by 2.
72
79
  def font_size
80
+ return nil unless @styles
81
+
73
82
  size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
74
83
  size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil
75
84
  end
76
85
 
86
+ # Hyperlink targets are extracted from the document.xml.rels file
87
+ def hyperlinks
88
+ hyperlink_relationships.each_with_object({}) do |rel, hash|
89
+ hash[rel.attributes['Id'].value] = rel.attributes['Target'].value
90
+ end
91
+ end
92
+
93
+ def hyperlink_relationships
94
+ @rels.xpath("//xmlns:Relationship[contains(@Type,'hyperlink')]")
95
+ end
96
+
77
97
  ##
78
98
  # *Deprecated*
79
99
  #
@@ -92,7 +112,7 @@ module Docx
92
112
 
93
113
  # Output entire document as a String HTML fragment
94
114
  def to_html
95
- paragraphs.map(&:to_html).join('\n')
115
+ paragraphs.map(&:to_html).join("\n")
96
116
  end
97
117
 
98
118
  # Save document to provided path
@@ -103,6 +123,7 @@ module Docx
103
123
  Zip::OutputStream.open(path) do |out|
104
124
  zip.each do |entry|
105
125
  next unless entry.file?
126
+
106
127
  out.put_next_entry(entry.name)
107
128
 
108
129
  if @replace[entry.name]
@@ -115,7 +136,28 @@ module Docx
115
136
  zip.close
116
137
  end
117
138
 
118
- alias_method :text, :to_s
139
+ # Output entire document as a StringIO object
140
+ def stream
141
+ update
142
+ stream = Zip::OutputStream.write_buffer do |out|
143
+ zip.each do |entry|
144
+ next unless entry.file?
145
+
146
+ out.put_next_entry(entry.name)
147
+
148
+ if @replace[entry.name]
149
+ out.write(@replace[entry.name])
150
+ else
151
+ out.write(zip.read(entry.name))
152
+ end
153
+ end
154
+ end
155
+
156
+ stream.rewind
157
+ stream
158
+ end
159
+
160
+ alias text to_s
119
161
 
120
162
  def replace_entry(entry_path, file_contents)
121
163
  @replace[entry_path] = file_contents
@@ -123,13 +165,30 @@ module Docx
123
165
 
124
166
  private
125
167
 
168
+ def load_styles
169
+ @styles_xml = @zip.read('word/styles.xml')
170
+ @styles = Nokogiri::XML(@styles_xml)
171
+ load_rels
172
+ rescue Errno::ENOENT => e
173
+ warn e.message
174
+ nil
175
+ end
176
+
177
+ def load_rels
178
+ rels_entry = @zip.glob('word/_rels/document*.xml.rels').first
179
+ raise Errno::ENOENT unless rels_entry
180
+
181
+ @rels_xml = rels_entry.get_input_stream.read
182
+ @rels = Nokogiri::XML(@rels_xml)
183
+ end
184
+
126
185
  #--
127
186
  # TODO: Flesh this out to be compatible with other files
128
187
  # TODO: Method to set flag on files that have been edited, probably by inserting something at the
129
188
  # end of methods that make edits?
130
189
  #++
131
190
  def update
132
- replace_entry "word/document.xml", doc.serialize(:save_with => 0)
191
+ replace_entry 'word/document.xml', doc.serialize(save_with: 0)
133
192
  end
134
193
 
135
194
  # generate Elements::Containers::Paragraph from paragraph XML node
@@ -5,7 +5,7 @@ module Docx
5
5
  class Bookmark
6
6
  include Element
7
7
  attr_accessor :name
8
-
8
+
9
9
  def self.tag
10
10
  'bookmarkStart'
11
11
  end
@@ -17,14 +17,14 @@ module Docx
17
17
 
18
18
  # Insert text before bookmarkStart node
19
19
  def insert_text_before(text)
20
- text_run = get_run_after
21
- text_run.text = "#{text}#{text_run.text}"
20
+ text_run = get_run_before
21
+ text_run.text = "#{text_run.text}#{text}"
22
22
  end
23
23
 
24
24
  # Insert text after bookmarkStart node
25
25
  def insert_text_after(text)
26
- text_run = get_run_before
27
- text_run.text = "#{text_run.text}#{text}"
26
+ text_run = get_run_after
27
+ text_run.text = "#{text}#{text_run.text}"
28
28
  end
29
29
 
30
30
  # insert multiple lines starting with paragraph containing bookmark node.
@@ -51,7 +51,7 @@ module Docx
51
51
 
52
52
  # Get text run immediately prior to bookmark node
53
53
  def get_run_before
54
- # at_xpath returns the first match found and preceding-sibling returns siblings in the
54
+ # at_xpath returns the first match found and preceding-sibling returns siblings in the
55
55
  # order they appear in the document not the order as they appear when moving out from
56
56
  # the starting node
57
57
  if not (r_nodes = @node.xpath("./preceding-sibling::w:r")).empty?
@@ -65,8 +65,10 @@ module Docx
65
65
  def html_tag(name, options = {})
66
66
  content = options[:content]
67
67
  styles = options[:styles]
68
+ attributes = options[:attributes]
68
69
 
69
70
  html = "<#{name.to_s}"
71
+
70
72
  unless styles.nil? || styles.empty?
71
73
  styles_array = []
72
74
  styles.each do |property, value|
@@ -74,6 +76,13 @@ module Docx
74
76
  end
75
77
  html << " style=\"#{styles_array.join('')}\""
76
78
  end
79
+
80
+ unless attributes.nil? || attributes.empty?
81
+ attributes.each do |attr_name, attr_value|
82
+ html << " #{attr_name}=\"#{attr_value}\""
83
+ end
84
+ end
85
+
77
86
  html << ">"
78
87
  html << content if content
79
88
  html << "</#{name.to_s}>"
data/lib/docx/version.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Docx #:nodoc:
2
- VERSION = '0.4.0'
4
+ VERSION = '0.6.2'
3
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Hunt
@@ -9,10 +9,10 @@ authors:
9
9
  - Higgins Dragon
10
10
  - Toms Mikoss
11
11
  - Sebastian Wittenkamp
12
- autorequire:
12
+ autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2020-02-09 00:00:00.000000000 Z
15
+ date: 2021-07-21 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: nokogiri
@@ -49,19 +49,19 @@ dependencies:
49
49
  - !ruby/object:Gem::Version
50
50
  version: '2.0'
51
51
  - !ruby/object:Gem::Dependency
52
- name: rspec
52
+ name: coveralls_reborn
53
53
  requirement: !ruby/object:Gem::Requirement
54
54
  requirements:
55
55
  - - "~>"
56
56
  - !ruby/object:Gem::Version
57
- version: '3.7'
57
+ version: '0.21'
58
58
  type: :development
59
59
  prerelease: false
60
60
  version_requirements: !ruby/object:Gem::Requirement
61
61
  requirements:
62
62
  - - "~>"
63
63
  - !ruby/object:Gem::Version
64
- version: '3.7'
64
+ version: '0.21'
65
65
  - !ruby/object:Gem::Dependency
66
66
  name: rake
67
67
  requirement: !ruby/object:Gem::Requirement
@@ -76,6 +76,20 @@ dependencies:
76
76
  - - "~>"
77
77
  - !ruby/object:Gem::Version
78
78
  version: '13.0'
79
+ - !ruby/object:Gem::Dependency
80
+ name: rspec
81
+ requirement: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - "~>"
84
+ - !ruby/object:Gem::Version
85
+ version: '3.7'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - "~>"
91
+ - !ruby/object:Gem::Version
92
+ version: '3.7'
79
93
  description: thin wrapper around rubyzip and nokogiri as a way to get started with
80
94
  docx files
81
95
  email:
@@ -106,7 +120,7 @@ homepage: https://github.com/chrahunt/docx
106
120
  licenses:
107
121
  - MIT
108
122
  metadata: {}
109
- post_install_message:
123
+ post_install_message:
110
124
  rdoc_options: []
111
125
  require_paths:
112
126
  - lib
@@ -114,15 +128,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
114
128
  requirements:
115
129
  - - ">="
116
130
  - !ruby/object:Gem::Version
117
- version: 2.4.0
131
+ version: 2.5.0
118
132
  required_rubygems_version: !ruby/object:Gem::Requirement
119
133
  requirements:
120
134
  - - ">="
121
135
  - !ruby/object:Gem::Version
122
136
  version: '0'
123
137
  requirements: []
124
- rubygems_version: 3.1.2
125
- signing_key:
138
+ rubygems_version: 3.1.6
139
+ signing_key:
126
140
  specification_version: 4
127
141
  summary: a ruby library/gem for interacting with .docx files
128
142
  test_files: []