lm_docstache 3.0.4 → 3.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce8fea2c12829636bd22622e1c022cf2ead4ec09997b7e13322f2b94b4261654
4
- data.tar.gz: a25d02153cb1a53bf74111dc59710d83a133a821c4632607c60e06531b28b4ad
3
+ metadata.gz: b340972dc8dbd8f4f0e2bbdd8b385468d626d9f5f4ea3d9090cd3c96cba5ab05
4
+ data.tar.gz: f901539b68bff0bf5bb736aaa3c4636ade982fe23adfe7ae70ba41a926186c9a
5
5
  SHA512:
6
- metadata.gz: 99bcdac9eea8b1d62e0733b692c600be5e8dcb88710f58d22299b6daa7eabab5f5b01eeb83e61668709655869f6580e918ef38d11bb5ae781db4462b38abba65
7
- data.tar.gz: ea96603b65d984edfeeb9aa9f9b760c9665af2c5df163e4c3dae076b24ebc10fccb8e1bdee1cc6e01761496a4b89b7454e80965d6a24e96eb67b53b17e46be84
6
+ metadata.gz: 49273e8a8c7133cbf4ef02f0c6bb66b64c19408e75a010cbce313557bcf22407b493efe57e66d760bf9327d68b32a2008981937780d2ce0cb2c047ec0a1c9fae
7
+ data.tar.gz: bf8a47eea31424c0f2a20c35063c73fd93353d9fd315f028b18591bb1cd3f4d3d52c83ae668713bc74d4618fd95dcc33006523aefb4fc54a1430dfaf38017621
data/CHANGELOG.md CHANGED
@@ -1,5 +1,48 @@
1
1
  # Changelog
2
2
 
3
+ ## 3.0.9
4
+
5
+ #### Bug fixes
6
+
7
+ * Text nodes merged in paragraphs with problems through `fix_errors` private
8
+ method have now the "xml:space" attribute preserved from now on.
9
+
10
+ ## 3.0.8
11
+
12
+ #### Bug fixes
13
+
14
+ * Fix a bug on `usable_tags` method, so it now properly and expectedly
15
+ includes conditional tag names that have its opening tag markup as the sole
16
+ content of paragraphs (which represents conditional blocks where both
17
+ opening and closing tags are in separate parapraghs sorrounding one or more
18
+ paragraphs as its conditional block content).
19
+
20
+ ## 3.0.7
21
+
22
+ #### Bug fixes
23
+
24
+ * Fix a bug on `usable_tag_names` method, so it now properly and expectedly
25
+ includes conditional tag names as well, as before.
26
+
27
+ ## 3.0.6
28
+
29
+ #### Bug fixes
30
+
31
+ * Fix bug on `LMDocstache::Docstache#unusable_tags` method, where `nil` could be
32
+ passed to `broken_tags.deleted_at` call.
33
+
34
+ ## 3.0.5
35
+
36
+ #### Bug fixes and improvements
37
+
38
+ * Improve the way broken tags are detected, making the algorithm wider in terms
39
+ detecting broken tags, specially if the broken tag is the opening part of
40
+ conditional tag blocks (which was being detected before these improvements).
41
+ * Improve the way the paragraphs with "unusable" tags are traversed and have
42
+ their same-style texts merged (hence the "unusable" tags becoming usable). So,
43
+ from now, `w:hyperlink` elements, for instance, are properly processed as
44
+ well.
45
+
3
46
  ## 3.0.4
4
47
  * Allow replacement `data` argument to be an `Array`. This feature allow to replace blocks
5
48
  in a sequentially order following the sequence of matching blocks order.
data/lib/lm_docstache.rb CHANGED
@@ -1,9 +1,9 @@
1
1
  require 'nokogiri'
2
2
  require 'zip'
3
3
  require "lm_docstache/version"
4
+ require "lm_docstache/parser"
4
5
  require "lm_docstache/document"
5
6
  require 'lm_docstache/hide_custom_tags'
6
- require "lm_docstache/parser"
7
7
  require "lm_docstache/condition"
8
8
  require "lm_docstache/conditional_block"
9
9
  require "lm_docstache/renderer"
@@ -1,7 +1,12 @@
1
1
  module LMDocstache
2
2
  class Document
3
- TAGS_REGEXP = /{{.+?}}/
3
+ WHOLE_BLOCK_START_REGEX = /^#{Parser::BLOCK_START_PATTERN}$/
4
+ GENERAL_TAG_REGEX = /\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/
4
5
  ROLES_REGEXP = /({{(sig|sigfirm|date|check|text|initial)\|(req|noreq)\|(.+?)}})/
6
+ BLOCK_CHILDREN_ELEMENTS = 'w|r,w|hyperlink,w|ins,w|del'
7
+ RUN_LIKE_ELEMENTS = 'w|r,w|ins'
8
+
9
+ attr_reader :document
5
10
 
6
11
  def initialize(*paths)
7
12
  raise ArgumentError if paths.empty?
@@ -34,38 +39,41 @@ module LMDocstache
34
39
 
35
40
  def tags
36
41
  @documents.values.flat_map do |document|
37
- document.text.strip.scan(TAGS_REGEXP)
42
+ document_text = document.text
43
+ extract_tag_names(document_text) + extract_tag_names(document_text, :full_block)
38
44
  end
39
45
  end
40
46
 
41
47
  def usable_tags
42
48
  @documents.values.reduce([]) do |tags, document|
43
49
  document.css('w|t').reduce(tags) do |document_tags, text_node|
44
- document_tags.push(*text_node.text.scan(TAGS_REGEXP))
50
+ text = text_node.text
51
+ document_tags.push(*extract_tag_names(text))
52
+ document_tags.push(*extract_tag_names(text, :start_block))
53
+ document_tags.push(*extract_tag_names(text, :full_block))
45
54
  end
46
55
  end
47
56
  end
48
57
 
49
58
  def usable_tag_names
50
- usable_tags.reject { |tag| tag =~ ROLES_REGEXP }.map do |tag|
51
- tag.scan(/\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/)
52
- $1
59
+ usable_tags.reduce([]) do |memo, tag|
60
+ next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP
61
+
62
+ tag = unescape_escaped_start_block(tag.source) if tag.is_a?(Regexp)
63
+ memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
53
64
  end.compact.uniq
54
65
  end
55
66
 
56
67
  def unusable_tags
57
- unusable_tags = tags
68
+ usable_tags.reduce(tags) do |broken_tags, usable_tag|
69
+ next broken_tags unless index = broken_tags.index(usable_tag)
58
70
 
59
- usable_tags.each do |usable_tag|
60
- index = unusable_tags.index(usable_tag)
61
- unusable_tags.delete_at(index) if index
71
+ broken_tags.delete_at(index) && broken_tags
62
72
  end
63
-
64
- unusable_tags
65
73
  end
66
74
 
67
75
  def fix_errors
68
- problem_paragraphs.each { |pg| flatten_paragraph(pg) if pg }
76
+ problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
69
77
  end
70
78
 
71
79
  def errors?
@@ -99,6 +107,28 @@ module LMDocstache
99
107
 
100
108
  private
101
109
 
110
+ def unescape_escaped_start_block(regex_source_string)
111
+ regex_source_string
112
+ .gsub('\\{', '{')
113
+ .gsub('\\#', '#')
114
+ .gsub('\\}', '}')
115
+ .gsub('\\^', '^')
116
+ .gsub('\\ ', ' ')
117
+ end
118
+
119
+ def extract_tag_names(text, tag_type = :variable)
120
+ text, regex, extractor =
121
+ if tag_type == :variable
122
+ [text, Parser::VARIABLE_MATCHER, ->(match) { "{{%s}}" % match }]
123
+ else
124
+ extractor = ->(match) { /#{Regexp.escape("{{%s%s %s %s}}" % match)}/ }
125
+ tag_type == :full_block ? [text, Parser::BLOCK_MATCHER, extractor] :
126
+ [text.strip, WHOLE_BLOCK_START_REGEX, extractor]
127
+ end
128
+
129
+ text.scan(regex).map(&extractor)
130
+ end
131
+
102
132
  def render_documents(data, text = nil, render_options = {})
103
133
  Hash[
104
134
  @documents.map do |(path, document)|
@@ -115,41 +145,51 @@ module LMDocstache
115
145
  def problem_paragraphs
116
146
  unusable_tags.flat_map do |tag|
117
147
  @documents.values.inject([]) do |tags, document|
118
- faulty_paragraphs = document
119
- .css('w|p')
120
- .select { |paragraph| paragraph.text =~ /#{Regexp.escape(tag)}/ }
148
+ faulty_paragraphs = document.css('w|p').select do |paragraph|
149
+ tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
150
+ paragraph.text =~ tag_regex
151
+ end
121
152
 
122
153
  tags + faulty_paragraphs
123
154
  end
124
155
  end
125
156
  end
126
157
 
127
- def flatten_paragraph(paragraph)
128
- return if (run_nodes = paragraph.css('w|r')).size < 2
158
+ def flatten_text_blocks(runs_wrapper)
159
+ return if (children = filtered_children(runs_wrapper)).size < 2
129
160
 
130
- while run_node = run_nodes.pop
131
- next if run_nodes.empty?
161
+ while node = children.pop
162
+ is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
163
+ previous_node = children.last
132
164
 
133
- style_node = run_node.at_css('w|rPr')
165
+ if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
166
+ next flatten_text_blocks(node)
167
+ end
168
+ next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
169
+ next if node.at_css('w|tab') || previous_node.at_css('w|tab')
170
+
171
+ style_node = node.at_css('w|rPr')
134
172
  style_html = style_node ? style_node.inner_html : ''
135
- previous_run_node = run_nodes.last
136
- previous_style_node = previous_run_node.at_css('w|rPr')
173
+ previous_style_node = previous_node.at_css('w|rPr')
137
174
  previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
138
- previous_text_node = previous_run_node.at_css('w|t')
139
- current_text_node = run_node.at_css('w|t')
140
-
141
- # avoid to merge blocks with tabs
142
- next if run_node.at_css('w|tab')
143
- next if previous_run_node.at_css('w|tab')
175
+ previous_text_node = previous_node.at_css('w|t')
176
+ current_text_node = node.at_css('w|t')
177
+ whitespace_attr = current_text_node['xml:space']
144
178
 
145
179
  next if style_html != previous_style_html
146
180
  next if current_text_node.nil? || previous_text_node.nil?
147
181
 
148
- previous_text_node.content = previous_text_node.text + run_node.text
149
- run_node.unlink
182
+ previous_text_node['xml:space'] = whitespace_attr if whitespace_attr
183
+ previous_text_node.content = previous_text_node.text + current_text_node.text
184
+
185
+ node.unlink
150
186
  end
151
187
  end
152
188
 
189
+ def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
190
+ Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
191
+ end
192
+
153
193
  def unzip_read(zip, zip_path)
154
194
  file = zip.find_entry(zip_path)
155
195
  contents = ""
@@ -184,7 +184,8 @@ module LMDocstache
184
184
  end
185
185
 
186
186
  def has_skippable_variable?(text)
187
- return true if hide_custom_tags.find { |(pattern, value)| text =~ pattern }
187
+ return true if hide_custom_tags.find { |(pattern, _)| text =~ pattern }
188
+
188
189
  !!special_variable_replacements.find do |(pattern, value)|
189
190
  text =~ pattern && value == false
190
191
  end
@@ -1,3 +1,3 @@
1
1
  module LMDocstache
2
- VERSION = "3.0.4"
2
+ VERSION = "3.0.9"
3
3
  end
@@ -1,5 +1,4 @@
1
1
  require 'spec_helper'
2
- require 'securerandom'
3
2
  require 'active_support/core_ext/object/blank.rb'
4
3
 
5
4
  module LMDocstache
@@ -63,7 +62,7 @@ describe 'integration test', integration: true do
63
62
  it 'fixes nested xml errors breaking tags' do
64
63
  expect { document.fix_errors }.to change {
65
64
  document.send(:problem_paragraphs).size
66
- }.from(6).to(1)
65
+ }.from(10).to(1)
67
66
 
68
67
  expect(document.send(:problem_paragraphs).first.text).to eq(
69
68
  '{{TAG123-\\-//WITH WE👻IRD CHARS}}'
@@ -71,7 +70,18 @@ describe 'integration test', integration: true do
71
70
  end
72
71
 
73
72
  it 'has the expected amount of usable tags' do
74
- expect(document.usable_tags.count).to eq(43)
73
+ expect { document.fix_errors }.to change {
74
+ document.usable_tags.count
75
+ }.from(29).to(37)
76
+ end
77
+
78
+ it 'keeps "xml:space" attribute when fixing errors' do
79
+ document.fix_errors
80
+
81
+ text_node = document.document.css('w|p').last
82
+ .css('w|t').find { |node| node.text.include?('that occurred on') }
83
+
84
+ expect(text_node['xml:space']).to eq('preserve')
75
85
  end
76
86
 
77
87
  it 'has the expected amount of usable roles tags' do
@@ -80,13 +90,14 @@ describe 'integration test', integration: true do
80
90
  end
81
91
 
82
92
  it 'has the expected amount of unique tag names' do
83
- expect(document.usable_tag_names.count).to eq(19)
93
+ expect(document.usable_tag_names.count).to eq(20)
84
94
  end
85
95
 
86
96
  it 'renders file using data' do
87
97
  document.render_file(output_file, data)
88
98
  end
89
99
  end
100
+
90
101
  context "testing hide custom tags" do
91
102
  before do
92
103
  FileUtils.rm_rf(output_dir) if File.exist?(output_dir)
@@ -140,30 +151,5 @@ describe 'integration test', integration: true do
140
151
  expect(output).to include('<w:t xml:space="preserve">Test Multiple text in the same line </w:t>')
141
152
  end
142
153
  end
143
-
144
- context "yoooo" do
145
- let(:input_file) { "#{base_path}/multi_o.docx" }
146
- let(:render_options) {
147
- {
148
- special_variable_replacements: { "(date|sig|sigfirm|text|check|initial|initials)\\|(req|noreq)\\|(.+?)" => false }.freeze,
149
- hide_custom_tags: ['(?:sig|sigfirm|date|check|text|initial)\|(?:req|noreq)\|.+?']
150
- }
151
- }
152
- let(:document) { LMDocstache::Document.new(input_file) }
153
-
154
- it 'should have content replacement aligned with hide custom tags' do
155
- doc = document
156
- doc.fix_errors
157
- new_file_path = "#{Time.now.to_i}-#{SecureRandom.uuid}.docx"
158
- n = doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
159
- noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
160
- output = noko['word/document.xml'].to_xml
161
- #puts output
162
- #doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
163
- #noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
164
- #output = noko['word/document.xml'].to_xml
165
- #puts output
166
- end
167
- end
168
154
  end
169
155
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lm_docstache
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.4
4
+ version: 3.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roey Chasman
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2021-05-14 00:00:00.000000000 Z
15
+ date: 2021-07-08 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: nokogiri