lm_docstache 3.0.4 → 3.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/lm_docstache.rb +1 -1
- data/lib/lm_docstache/document.rb +70 -31
- data/lib/lm_docstache/parser.rb +2 -1
- data/lib/lm_docstache/version.rb +1 -1
- data/spec/example_input/ExampleTemplate.docx +0 -0
- data/spec/integration_spec.rb +3 -29
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc1a3839b3cfabfd78b144d3f2862aa4a8bf1650bc4037220b2e5af4feaa4a31
|
4
|
+
data.tar.gz: d573c864a49f7e2dcc07122fc242664b597a588253408f065ad94cb0f2823f0a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b0fb9ff483de6b2e4315206d1961f3ff847612519ed7ff11203f8ca9e7aabd35fc8a5f8730ff552c171082a0d29d2a1a126f86e89a3a117e3a10ab0a5fb5222
|
7
|
+
data.tar.gz: a8c510d591b10ee2d66e6c3ac85995ee9eadb8d91f2178c127e70c12b4184d840cfa725929d4d02b40e62b2e2f9f2c7d629bf65866164833e421a5fdbb5e5c7b
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 3.0.5
|
4
|
+
|
5
|
+
#### Bug fixes and improvements
|
6
|
+
|
7
|
+
* Improve the way broken tags are detected, making the algorithm wider in terms
|
8
|
+
detecting broken tags, specially if the broken tag is the opening part of
|
9
|
+
conditional tag blocks (which was being detected before these improvements).
|
10
|
+
* Improve the way the paragraphs with "unusable" tags are traversed and have
|
11
|
+
their same-style texts merged (hence the "unusable" tags becoming usable). So,
|
12
|
+
from now, `w:hyperlink` elements, for instance, are properly processed as
|
13
|
+
well.
|
14
|
+
|
3
15
|
## 3.0.4
|
4
16
|
* Allow replacement `data` argument to be an `Array`. This feature allow to replace blocks
|
5
17
|
in a sequentially order following the sequence of matching blocks order.
|
data/lib/lm_docstache.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'zip'
|
3
3
|
require "lm_docstache/version"
|
4
|
+
require "lm_docstache/parser"
|
4
5
|
require "lm_docstache/document"
|
5
6
|
require 'lm_docstache/hide_custom_tags'
|
6
|
-
require "lm_docstache/parser"
|
7
7
|
require "lm_docstache/condition"
|
8
8
|
require "lm_docstache/conditional_block"
|
9
9
|
require "lm_docstache/renderer"
|
@@ -1,7 +1,10 @@
|
|
1
1
|
module LMDocstache
|
2
2
|
class Document
|
3
|
-
|
3
|
+
WHOLE_BLOCK_START_REGEX = /^#{Parser::BLOCK_START_PATTERN}$/
|
4
|
+
GENERAL_TAG_REGEX = /\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/
|
4
5
|
ROLES_REGEXP = /({{(sig|sigfirm|date|check|text|initial)\|(req|noreq)\|(.+?)}})/
|
6
|
+
BLOCK_CHILDREN_ELEMENTS = 'w|r,w|hyperlink,w|ins,w|del'
|
7
|
+
RUN_LIKE_ELEMENTS = 'w|r,w|ins'
|
5
8
|
|
6
9
|
def initialize(*paths)
|
7
10
|
raise ArgumentError if paths.empty?
|
@@ -34,38 +37,48 @@ module LMDocstache
|
|
34
37
|
|
35
38
|
def tags
|
36
39
|
@documents.values.flat_map do |document|
|
37
|
-
document.text
|
40
|
+
document_text = document.text
|
41
|
+
extract_tag_names(document_text) + extract_tag_names(document_text, true)
|
38
42
|
end
|
39
43
|
end
|
40
44
|
|
41
45
|
def usable_tags
|
42
46
|
@documents.values.reduce([]) do |tags, document|
|
43
47
|
document.css('w|t').reduce(tags) do |document_tags, text_node|
|
44
|
-
|
48
|
+
text = text_node.text
|
49
|
+
document_tags.push(*extract_tag_names(text))
|
50
|
+
document_tags.push(*extract_tag_names(text, true))
|
45
51
|
end
|
46
52
|
end
|
47
53
|
end
|
48
54
|
|
49
55
|
def usable_tag_names
|
50
|
-
usable_tags.
|
51
|
-
tag.
|
52
|
-
|
56
|
+
usable_tags.reduce([]) do |memo, tag|
|
57
|
+
next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP
|
58
|
+
|
59
|
+
tag = tag.source if tag.is_a?(Regexp)
|
60
|
+
memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
|
53
61
|
end.compact.uniq
|
54
62
|
end
|
55
63
|
|
56
64
|
def unusable_tags
|
57
|
-
|
65
|
+
conditional_start_tags = text_nodes_containing_only_starting_conditionals.map(&:text)
|
66
|
+
|
67
|
+
usable_tags.reduce(tags) do |broken_tags, usable_tag|
|
68
|
+
broken_tags.delete_at(broken_tags.index(usable_tag)) && broken_tags
|
69
|
+
end.reject do |broken_tag|
|
70
|
+
operator = broken_tag.is_a?(Regexp) ? :=~ : :==
|
71
|
+
start_tags_index = conditional_start_tags.find_index do |start_tag|
|
72
|
+
broken_tag.send(operator, start_tag)
|
73
|
+
end
|
58
74
|
|
59
|
-
|
60
|
-
|
61
|
-
unusable_tags.delete_at(index) if index
|
75
|
+
conditional_start_tags.delete_at(start_tags_index) if start_tags_index
|
76
|
+
!!start_tags_index
|
62
77
|
end
|
63
|
-
|
64
|
-
unusable_tags
|
65
78
|
end
|
66
79
|
|
67
80
|
def fix_errors
|
68
|
-
problem_paragraphs.each { |pg|
|
81
|
+
problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
|
69
82
|
end
|
70
83
|
|
71
84
|
def errors?
|
@@ -99,6 +112,25 @@ module LMDocstache
|
|
99
112
|
|
100
113
|
private
|
101
114
|
|
115
|
+
def text_nodes_containing_only_starting_conditionals
|
116
|
+
@documents.values.flat_map do |document|
|
117
|
+
document.css('w|t').select do |paragraph|
|
118
|
+
paragraph.text =~ WHOLE_BLOCK_START_REGEX
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def extract_tag_names(text, conditional_tag = false)
|
124
|
+
if conditional_tag
|
125
|
+
text.scan(Parser::BLOCK_MATCHER).map do |match|
|
126
|
+
start_block_tag = "{{#{match[0]}#{match[1]} #{match[2]} #{match[3]}}}"
|
127
|
+
/#{Regexp.escape(start_block_tag)}/
|
128
|
+
end
|
129
|
+
else
|
130
|
+
text.scan(Parser::VARIABLE_MATCHER).map { |match| "{{#{match[0]}}}" }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
102
134
|
def render_documents(data, text = nil, render_options = {})
|
103
135
|
Hash[
|
104
136
|
@documents.map do |(path, document)|
|
@@ -115,41 +147,48 @@ module LMDocstache
|
|
115
147
|
def problem_paragraphs
|
116
148
|
unusable_tags.flat_map do |tag|
|
117
149
|
@documents.values.inject([]) do |tags, document|
|
118
|
-
faulty_paragraphs = document
|
119
|
-
.
|
120
|
-
|
150
|
+
faulty_paragraphs = document.css('w|p').select do |paragraph|
|
151
|
+
tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
|
152
|
+
paragraph.text =~ tag_regex
|
153
|
+
end
|
121
154
|
|
122
155
|
tags + faulty_paragraphs
|
123
156
|
end
|
124
157
|
end
|
125
158
|
end
|
126
159
|
|
127
|
-
def
|
128
|
-
return if (
|
160
|
+
def flatten_text_blocks(runs_wrapper)
|
161
|
+
return if (children = filtered_children(runs_wrapper)).size < 2
|
162
|
+
|
163
|
+
while node = children.pop
|
164
|
+
is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
|
165
|
+
previous_node = children.last
|
129
166
|
|
130
|
-
|
131
|
-
|
167
|
+
if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
|
168
|
+
next flatten_text_blocks(node)
|
169
|
+
end
|
170
|
+
next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
|
171
|
+
next if node.at_css('w|tab') || previous_node.at_css('w|tab')
|
132
172
|
|
133
|
-
style_node =
|
173
|
+
style_node = node.at_css('w|rPr')
|
134
174
|
style_html = style_node ? style_node.inner_html : ''
|
135
|
-
|
136
|
-
previous_style_node = previous_run_node.at_css('w|rPr')
|
175
|
+
previous_style_node = previous_node.at_css('w|rPr')
|
137
176
|
previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
|
138
|
-
previous_text_node =
|
139
|
-
current_text_node =
|
140
|
-
|
141
|
-
# avoid to merge blocks with tabs
|
142
|
-
next if run_node.at_css('w|tab')
|
143
|
-
next if previous_run_node.at_css('w|tab')
|
177
|
+
previous_text_node = previous_node.at_css('w|t')
|
178
|
+
current_text_node = node.at_css('w|t')
|
144
179
|
|
145
180
|
next if style_html != previous_style_html
|
146
181
|
next if current_text_node.nil? || previous_text_node.nil?
|
147
182
|
|
148
|
-
previous_text_node.content = previous_text_node.text +
|
149
|
-
|
183
|
+
previous_text_node.content = previous_text_node.text + current_text_node.text
|
184
|
+
node.unlink
|
150
185
|
end
|
151
186
|
end
|
152
187
|
|
188
|
+
def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
|
189
|
+
Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
|
190
|
+
end
|
191
|
+
|
153
192
|
def unzip_read(zip, zip_path)
|
154
193
|
file = zip.find_entry(zip_path)
|
155
194
|
contents = ""
|
data/lib/lm_docstache/parser.rb
CHANGED
@@ -184,7 +184,8 @@ module LMDocstache
|
|
184
184
|
end
|
185
185
|
|
186
186
|
def has_skippable_variable?(text)
|
187
|
-
return true if hide_custom_tags.find { |(pattern,
|
187
|
+
return true if hide_custom_tags.find { |(pattern, _)| text =~ pattern }
|
188
|
+
|
188
189
|
!!special_variable_replacements.find do |(pattern, value)|
|
189
190
|
text =~ pattern && value == false
|
190
191
|
end
|
data/lib/lm_docstache/version.rb
CHANGED
Binary file
|
data/spec/integration_spec.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require 'securerandom'
|
3
2
|
require 'active_support/core_ext/object/blank.rb'
|
4
3
|
|
5
4
|
module LMDocstache
|
@@ -63,7 +62,7 @@ describe 'integration test', integration: true do
|
|
63
62
|
it 'fixes nested xml errors breaking tags' do
|
64
63
|
expect { document.fix_errors }.to change {
|
65
64
|
document.send(:problem_paragraphs).size
|
66
|
-
}.from(
|
65
|
+
}.from(7).to(1)
|
67
66
|
|
68
67
|
expect(document.send(:problem_paragraphs).first.text).to eq(
|
69
68
|
'{{TAG123-\\-//WITH WE👻IRD CHARS}}'
|
@@ -71,7 +70,7 @@ describe 'integration test', integration: true do
|
|
71
70
|
end
|
72
71
|
|
73
72
|
it 'has the expected amount of usable tags' do
|
74
|
-
expect(document.usable_tags.count).to eq(
|
73
|
+
expect(document.usable_tags.count).to eq(21)
|
75
74
|
end
|
76
75
|
|
77
76
|
it 'has the expected amount of usable roles tags' do
|
@@ -80,7 +79,7 @@ describe 'integration test', integration: true do
|
|
80
79
|
end
|
81
80
|
|
82
81
|
it 'has the expected amount of unique tag names' do
|
83
|
-
expect(document.usable_tag_names.count).to eq(
|
82
|
+
expect(document.usable_tag_names.count).to eq(14)
|
84
83
|
end
|
85
84
|
|
86
85
|
it 'renders file using data' do
|
@@ -140,30 +139,5 @@ describe 'integration test', integration: true do
|
|
140
139
|
expect(output).to include('<w:t xml:space="preserve">Test Multiple text in the same line </w:t>')
|
141
140
|
end
|
142
141
|
end
|
143
|
-
|
144
|
-
context "yoooo" do
|
145
|
-
let(:input_file) { "#{base_path}/multi_o.docx" }
|
146
|
-
let(:render_options) {
|
147
|
-
{
|
148
|
-
special_variable_replacements: { "(date|sig|sigfirm|text|check|initial|initials)\\|(req|noreq)\\|(.+?)" => false }.freeze,
|
149
|
-
hide_custom_tags: ['(?:sig|sigfirm|date|check|text|initial)\|(?:req|noreq)\|.+?']
|
150
|
-
}
|
151
|
-
}
|
152
|
-
let(:document) { LMDocstache::Document.new(input_file) }
|
153
|
-
|
154
|
-
it 'should have content replacement aligned with hide custom tags' do
|
155
|
-
doc = document
|
156
|
-
doc.fix_errors
|
157
|
-
new_file_path = "#{Time.now.to_i}-#{SecureRandom.uuid}.docx"
|
158
|
-
n = doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
|
159
|
-
noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
|
160
|
-
output = noko['word/document.xml'].to_xml
|
161
|
-
#puts output
|
162
|
-
#doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
|
163
|
-
#noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
|
164
|
-
#output = noko['word/document.xml'].to_xml
|
165
|
-
#puts output
|
166
|
-
end
|
167
|
-
end
|
168
142
|
end
|
169
143
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lm_docstache
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Roey Chasman
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2021-
|
15
|
+
date: 2021-06-04 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: nokogiri
|