lm_docstache 3.0.4 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce8fea2c12829636bd22622e1c022cf2ead4ec09997b7e13322f2b94b4261654
4
- data.tar.gz: a25d02153cb1a53bf74111dc59710d83a133a821c4632607c60e06531b28b4ad
3
+ metadata.gz: cc1a3839b3cfabfd78b144d3f2862aa4a8bf1650bc4037220b2e5af4feaa4a31
4
+ data.tar.gz: d573c864a49f7e2dcc07122fc242664b597a588253408f065ad94cb0f2823f0a
5
5
  SHA512:
6
- metadata.gz: 99bcdac9eea8b1d62e0733b692c600be5e8dcb88710f58d22299b6daa7eabab5f5b01eeb83e61668709655869f6580e918ef38d11bb5ae781db4462b38abba65
7
- data.tar.gz: ea96603b65d984edfeeb9aa9f9b760c9665af2c5df163e4c3dae076b24ebc10fccb8e1bdee1cc6e01761496a4b89b7454e80965d6a24e96eb67b53b17e46be84
6
+ metadata.gz: 7b0fb9ff483de6b2e4315206d1961f3ff847612519ed7ff11203f8ca9e7aabd35fc8a5f8730ff552c171082a0d29d2a1a126f86e89a3a117e3a10ab0a5fb5222
7
+ data.tar.gz: a8c510d591b10ee2d66e6c3ac85995ee9eadb8d91f2178c127e70c12b4184d840cfa725929d4d02b40e62b2e2f9f2c7d629bf65866164833e421a5fdbb5e5c7b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 3.0.5
4
+
5
+ #### Bug fixes and improvements
6
+
7
+ * Improve the way broken tags are detected, making the algorithm wider in terms
8
+ detecting broken tags, specially if the broken tag is the opening part of
9
+ conditional tag blocks (which was being detected before these improvements).
10
+ * Improve the way the paragraphs with "unusable" tags are traversed and have
11
+ their same-style texts merged (hence the "unusable" tags becoming usable). So,
12
+ from now, `w:hyperlink` elements, for instance, are properly processed as
13
+ well.
14
+
3
15
  ## 3.0.4
4
16
  * Allow replacement `data` argument to be an `Array`. This feature allow to replace blocks
5
17
  in a sequentially order following the sequence of matching blocks order.
data/lib/lm_docstache.rb CHANGED
@@ -1,9 +1,9 @@
1
1
  require 'nokogiri'
2
2
  require 'zip'
3
3
  require "lm_docstache/version"
4
+ require "lm_docstache/parser"
4
5
  require "lm_docstache/document"
5
6
  require 'lm_docstache/hide_custom_tags'
6
- require "lm_docstache/parser"
7
7
  require "lm_docstache/condition"
8
8
  require "lm_docstache/conditional_block"
9
9
  require "lm_docstache/renderer"
@@ -1,7 +1,10 @@
1
1
  module LMDocstache
2
2
  class Document
3
- TAGS_REGEXP = /{{.+?}}/
3
+ WHOLE_BLOCK_START_REGEX = /^#{Parser::BLOCK_START_PATTERN}$/
4
+ GENERAL_TAG_REGEX = /\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/
4
5
  ROLES_REGEXP = /({{(sig|sigfirm|date|check|text|initial)\|(req|noreq)\|(.+?)}})/
6
+ BLOCK_CHILDREN_ELEMENTS = 'w|r,w|hyperlink,w|ins,w|del'
7
+ RUN_LIKE_ELEMENTS = 'w|r,w|ins'
5
8
 
6
9
  def initialize(*paths)
7
10
  raise ArgumentError if paths.empty?
@@ -34,38 +37,48 @@ module LMDocstache
34
37
 
35
38
  def tags
36
39
  @documents.values.flat_map do |document|
37
- document.text.strip.scan(TAGS_REGEXP)
40
+ document_text = document.text
41
+ extract_tag_names(document_text) + extract_tag_names(document_text, true)
38
42
  end
39
43
  end
40
44
 
41
45
  def usable_tags
42
46
  @documents.values.reduce([]) do |tags, document|
43
47
  document.css('w|t').reduce(tags) do |document_tags, text_node|
44
- document_tags.push(*text_node.text.scan(TAGS_REGEXP))
48
+ text = text_node.text
49
+ document_tags.push(*extract_tag_names(text))
50
+ document_tags.push(*extract_tag_names(text, true))
45
51
  end
46
52
  end
47
53
  end
48
54
 
49
55
  def usable_tag_names
50
- usable_tags.reject { |tag| tag =~ ROLES_REGEXP }.map do |tag|
51
- tag.scan(/\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/)
52
- $1
56
+ usable_tags.reduce([]) do |memo, tag|
57
+ next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP
58
+
59
+ tag = tag.source if tag.is_a?(Regexp)
60
+ memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
53
61
  end.compact.uniq
54
62
  end
55
63
 
56
64
  def unusable_tags
57
- unusable_tags = tags
65
+ conditional_start_tags = text_nodes_containing_only_starting_conditionals.map(&:text)
66
+
67
+ usable_tags.reduce(tags) do |broken_tags, usable_tag|
68
+ broken_tags.delete_at(broken_tags.index(usable_tag)) && broken_tags
69
+ end.reject do |broken_tag|
70
+ operator = broken_tag.is_a?(Regexp) ? :=~ : :==
71
+ start_tags_index = conditional_start_tags.find_index do |start_tag|
72
+ broken_tag.send(operator, start_tag)
73
+ end
58
74
 
59
- usable_tags.each do |usable_tag|
60
- index = unusable_tags.index(usable_tag)
61
- unusable_tags.delete_at(index) if index
75
+ conditional_start_tags.delete_at(start_tags_index) if start_tags_index
76
+ !!start_tags_index
62
77
  end
63
-
64
- unusable_tags
65
78
  end
66
79
 
67
80
  def fix_errors
68
- problem_paragraphs.each { |pg| flatten_paragraph(pg) if pg }
81
+ problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
69
82
  end
70
83
 
71
84
  def errors?
@@ -99,6 +112,25 @@ module LMDocstache
99
112
 
100
113
  private
101
114
 
115
+ def text_nodes_containing_only_starting_conditionals
116
+ @documents.values.flat_map do |document|
117
+ document.css('w|t').select do |paragraph|
118
+ paragraph.text =~ WHOLE_BLOCK_START_REGEX
119
+ end
120
+ end
121
+ end
122
+
123
+ def extract_tag_names(text, conditional_tag = false)
124
+ if conditional_tag
125
+ text.scan(Parser::BLOCK_MATCHER).map do |match|
126
+ start_block_tag = "{{#{match[0]}#{match[1]} #{match[2]} #{match[3]}}}"
127
+ /#{Regexp.escape(start_block_tag)}/
128
+ end
129
+ else
130
+ text.scan(Parser::VARIABLE_MATCHER).map { |match| "{{#{match[0]}}}" }
131
+ end
132
+ end
133
+
102
134
  def render_documents(data, text = nil, render_options = {})
103
135
  Hash[
104
136
  @documents.map do |(path, document)|
@@ -115,41 +147,48 @@ module LMDocstache
115
147
  def problem_paragraphs
116
148
  unusable_tags.flat_map do |tag|
117
149
  @documents.values.inject([]) do |tags, document|
118
- faulty_paragraphs = document
119
- .css('w|p')
120
- .select { |paragraph| paragraph.text =~ /#{Regexp.escape(tag)}/ }
150
+ faulty_paragraphs = document.css('w|p').select do |paragraph|
151
+ tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
152
+ paragraph.text =~ tag_regex
153
+ end
121
154
 
122
155
  tags + faulty_paragraphs
123
156
  end
124
157
  end
125
158
  end
126
159
 
127
- def flatten_paragraph(paragraph)
128
- return if (run_nodes = paragraph.css('w|r')).size < 2
160
+ def flatten_text_blocks(runs_wrapper)
161
+ return if (children = filtered_children(runs_wrapper)).size < 2
162
+
163
+ while node = children.pop
164
+ is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
165
+ previous_node = children.last
129
166
 
130
- while run_node = run_nodes.pop
131
- next if run_nodes.empty?
167
+ if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
168
+ next flatten_text_blocks(node)
169
+ end
170
+ next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
171
+ next if node.at_css('w|tab') || previous_node.at_css('w|tab')
132
172
 
133
- style_node = run_node.at_css('w|rPr')
173
+ style_node = node.at_css('w|rPr')
134
174
  style_html = style_node ? style_node.inner_html : ''
135
- previous_run_node = run_nodes.last
136
- previous_style_node = previous_run_node.at_css('w|rPr')
175
+ previous_style_node = previous_node.at_css('w|rPr')
137
176
  previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
138
- previous_text_node = previous_run_node.at_css('w|t')
139
- current_text_node = run_node.at_css('w|t')
140
-
141
- # avoid to merge blocks with tabs
142
- next if run_node.at_css('w|tab')
143
- next if previous_run_node.at_css('w|tab')
177
+ previous_text_node = previous_node.at_css('w|t')
178
+ current_text_node = node.at_css('w|t')
144
179
 
145
180
  next if style_html != previous_style_html
146
181
  next if current_text_node.nil? || previous_text_node.nil?
147
182
 
148
- previous_text_node.content = previous_text_node.text + run_node.text
149
- run_node.unlink
183
+ previous_text_node.content = previous_text_node.text + current_text_node.text
184
+ node.unlink
150
185
  end
151
186
  end
152
187
 
188
+ def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
189
+ Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
190
+ end
191
+
153
192
  def unzip_read(zip, zip_path)
154
193
  file = zip.find_entry(zip_path)
155
194
  contents = ""
@@ -184,7 +184,8 @@ module LMDocstache
184
184
  end
185
185
 
186
186
  def has_skippable_variable?(text)
187
- return true if hide_custom_tags.find { |(pattern, value)| text =~ pattern }
187
+ return true if hide_custom_tags.find { |(pattern, _)| text =~ pattern }
188
+
188
189
  !!special_variable_replacements.find do |(pattern, value)|
189
190
  text =~ pattern && value == false
190
191
  end
@@ -1,3 +1,3 @@
1
1
  module LMDocstache
2
- VERSION = "3.0.4"
2
+ VERSION = "3.0.5"
3
3
  end
@@ -1,5 +1,4 @@
1
1
  require 'spec_helper'
2
- require 'securerandom'
3
2
  require 'active_support/core_ext/object/blank.rb'
4
3
 
5
4
  module LMDocstache
@@ -63,7 +62,7 @@ describe 'integration test', integration: true do
63
62
  it 'fixes nested xml errors breaking tags' do
64
63
  expect { document.fix_errors }.to change {
65
64
  document.send(:problem_paragraphs).size
66
- }.from(6).to(1)
65
+ }.from(7).to(1)
67
66
 
68
67
  expect(document.send(:problem_paragraphs).first.text).to eq(
69
68
  '{{TAG123-\\-//WITH WE👻IRD CHARS}}'
@@ -71,7 +70,7 @@ describe 'integration test', integration: true do
71
70
  end
72
71
 
73
72
  it 'has the expected amount of usable tags' do
74
- expect(document.usable_tags.count).to eq(43)
73
+ expect(document.usable_tags.count).to eq(21)
75
74
  end
76
75
 
77
76
  it 'has the expected amount of usable roles tags' do
@@ -80,7 +79,7 @@ describe 'integration test', integration: true do
80
79
  end
81
80
 
82
81
  it 'has the expected amount of unique tag names' do
83
- expect(document.usable_tag_names.count).to eq(19)
82
+ expect(document.usable_tag_names.count).to eq(14)
84
83
  end
85
84
 
86
85
  it 'renders file using data' do
@@ -140,30 +139,5 @@ describe 'integration test', integration: true do
140
139
  expect(output).to include('<w:t xml:space="preserve">Test Multiple text in the same line </w:t>')
141
140
  end
142
141
  end
143
-
144
- context "yoooo" do
145
- let(:input_file) { "#{base_path}/multi_o.docx" }
146
- let(:render_options) {
147
- {
148
- special_variable_replacements: { "(date|sig|sigfirm|text|check|initial|initials)\\|(req|noreq)\\|(.+?)" => false }.freeze,
149
- hide_custom_tags: ['(?:sig|sigfirm|date|check|text|initial)\|(?:req|noreq)\|.+?']
150
- }
151
- }
152
- let(:document) { LMDocstache::Document.new(input_file) }
153
-
154
- it 'should have content replacement aligned with hide custom tags' do
155
- doc = document
156
- doc.fix_errors
157
- new_file_path = "#{Time.now.to_i}-#{SecureRandom.uuid}.docx"
158
- n = doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
159
- noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
160
- output = noko['word/document.xml'].to_xml
161
- #puts output
162
- #doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
163
- #noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
164
- #output = noko['word/document.xml'].to_xml
165
- #puts output
166
- end
167
- end
168
142
  end
169
143
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lm_docstache
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.4
4
+ version: 3.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roey Chasman
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2021-05-14 00:00:00.000000000 Z
15
+ date: 2021-06-04 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: nokogiri