lm_docstache 3.0.4 → 3.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -0
- data/lib/lm_docstache.rb +1 -1
- data/lib/lm_docstache/document.rb +71 -31
- data/lib/lm_docstache/parser.rb +2 -1
- data/lib/lm_docstache/version.rb +1 -1
- data/spec/example_input/ExampleTemplate.docx +0 -0
- data/spec/integration_spec.rb +15 -29
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b340972dc8dbd8f4f0e2bbdd8b385468d626d9f5f4ea3d9090cd3c96cba5ab05
|
4
|
+
data.tar.gz: f901539b68bff0bf5bb736aaa3c4636ade982fe23adfe7ae70ba41a926186c9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 49273e8a8c7133cbf4ef02f0c6bb66b64c19408e75a010cbce313557bcf22407b493efe57e66d760bf9327d68b32a2008981937780d2ce0cb2c047ec0a1c9fae
|
7
|
+
data.tar.gz: bf8a47eea31424c0f2a20c35063c73fd93353d9fd315f028b18591bb1cd3f4d3d52c83ae668713bc74d4618fd95dcc33006523aefb4fc54a1430dfaf38017621
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,48 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 3.0.9
|
4
|
+
|
5
|
+
#### Bug fixes
|
6
|
+
|
7
|
+
* Text nodes merged in paragraphs with problems through `fix_errors` private
|
8
|
+
method have now the "xml:space" attribute preserved from now on.
|
9
|
+
|
10
|
+
## 3.0.8
|
11
|
+
|
12
|
+
#### Bug fixes
|
13
|
+
|
14
|
+
* Fix a bug on `usable_tags` method, so it now properly and expectedly
|
15
|
+
includes conditional tag names that have its opening tag markup as the sole
|
16
|
+
content of paragraphs (which represents conditional blocks where both
|
17
|
+
opening and closing tags are in separate parapraghs sorrounding one or more
|
18
|
+
paragraphs as its conditional block content).
|
19
|
+
|
20
|
+
## 3.0.7
|
21
|
+
|
22
|
+
#### Bug fixes
|
23
|
+
|
24
|
+
* Fix a bug on `usable_tag_names` method, so it now properly and expectedly
|
25
|
+
includes conditional tag names as well, as before.
|
26
|
+
|
27
|
+
## 3.0.6
|
28
|
+
|
29
|
+
#### Bug fixes
|
30
|
+
|
31
|
+
* Fix bug on `LMDocstache::Docstache#unusable_tags` method, where `nil` could be
|
32
|
+
passed to `broken_tags.deleted_at` call.
|
33
|
+
|
34
|
+
## 3.0.5
|
35
|
+
|
36
|
+
#### Bug fixes and improvements
|
37
|
+
|
38
|
+
* Improve the way broken tags are detected, making the algorithm wider in terms
|
39
|
+
detecting broken tags, specially if the broken tag is the opening part of
|
40
|
+
conditional tag blocks (which was being detected before these improvements).
|
41
|
+
* Improve the way the paragraphs with "unusable" tags are traversed and have
|
42
|
+
their same-style texts merged (hence the "unusable" tags becoming usable). So,
|
43
|
+
from now, `w:hyperlink` elements, for instance, are properly processed as
|
44
|
+
well.
|
45
|
+
|
3
46
|
## 3.0.4
|
4
47
|
* Allow replacement `data` argument to be an `Array`. This feature allow to replace blocks
|
5
48
|
in a sequentially order following the sequence of matching blocks order.
|
data/lib/lm_docstache.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'zip'
|
3
3
|
require "lm_docstache/version"
|
4
|
+
require "lm_docstache/parser"
|
4
5
|
require "lm_docstache/document"
|
5
6
|
require 'lm_docstache/hide_custom_tags'
|
6
|
-
require "lm_docstache/parser"
|
7
7
|
require "lm_docstache/condition"
|
8
8
|
require "lm_docstache/conditional_block"
|
9
9
|
require "lm_docstache/renderer"
|
@@ -1,7 +1,12 @@
|
|
1
1
|
module LMDocstache
|
2
2
|
class Document
|
3
|
-
|
3
|
+
WHOLE_BLOCK_START_REGEX = /^#{Parser::BLOCK_START_PATTERN}$/
|
4
|
+
GENERAL_TAG_REGEX = /\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/
|
4
5
|
ROLES_REGEXP = /({{(sig|sigfirm|date|check|text|initial)\|(req|noreq)\|(.+?)}})/
|
6
|
+
BLOCK_CHILDREN_ELEMENTS = 'w|r,w|hyperlink,w|ins,w|del'
|
7
|
+
RUN_LIKE_ELEMENTS = 'w|r,w|ins'
|
8
|
+
|
9
|
+
attr_reader :document
|
5
10
|
|
6
11
|
def initialize(*paths)
|
7
12
|
raise ArgumentError if paths.empty?
|
@@ -34,38 +39,41 @@ module LMDocstache
|
|
34
39
|
|
35
40
|
def tags
|
36
41
|
@documents.values.flat_map do |document|
|
37
|
-
document.text
|
42
|
+
document_text = document.text
|
43
|
+
extract_tag_names(document_text) + extract_tag_names(document_text, :full_block)
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
41
47
|
def usable_tags
|
42
48
|
@documents.values.reduce([]) do |tags, document|
|
43
49
|
document.css('w|t').reduce(tags) do |document_tags, text_node|
|
44
|
-
|
50
|
+
text = text_node.text
|
51
|
+
document_tags.push(*extract_tag_names(text))
|
52
|
+
document_tags.push(*extract_tag_names(text, :start_block))
|
53
|
+
document_tags.push(*extract_tag_names(text, :full_block))
|
45
54
|
end
|
46
55
|
end
|
47
56
|
end
|
48
57
|
|
49
58
|
def usable_tag_names
|
50
|
-
usable_tags.
|
51
|
-
tag.
|
52
|
-
|
59
|
+
usable_tags.reduce([]) do |memo, tag|
|
60
|
+
next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP
|
61
|
+
|
62
|
+
tag = unescape_escaped_start_block(tag.source) if tag.is_a?(Regexp)
|
63
|
+
memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
|
53
64
|
end.compact.uniq
|
54
65
|
end
|
55
66
|
|
56
67
|
def unusable_tags
|
57
|
-
|
68
|
+
usable_tags.reduce(tags) do |broken_tags, usable_tag|
|
69
|
+
next broken_tags unless index = broken_tags.index(usable_tag)
|
58
70
|
|
59
|
-
|
60
|
-
index = unusable_tags.index(usable_tag)
|
61
|
-
unusable_tags.delete_at(index) if index
|
71
|
+
broken_tags.delete_at(index) && broken_tags
|
62
72
|
end
|
63
|
-
|
64
|
-
unusable_tags
|
65
73
|
end
|
66
74
|
|
67
75
|
def fix_errors
|
68
|
-
problem_paragraphs.each { |pg|
|
76
|
+
problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
|
69
77
|
end
|
70
78
|
|
71
79
|
def errors?
|
@@ -99,6 +107,28 @@ module LMDocstache
|
|
99
107
|
|
100
108
|
private
|
101
109
|
|
110
|
+
def unescape_escaped_start_block(regex_source_string)
|
111
|
+
regex_source_string
|
112
|
+
.gsub('\\{', '{')
|
113
|
+
.gsub('\\#', '#')
|
114
|
+
.gsub('\\}', '}')
|
115
|
+
.gsub('\\^', '^')
|
116
|
+
.gsub('\\ ', ' ')
|
117
|
+
end
|
118
|
+
|
119
|
+
def extract_tag_names(text, tag_type = :variable)
|
120
|
+
text, regex, extractor =
|
121
|
+
if tag_type == :variable
|
122
|
+
[text, Parser::VARIABLE_MATCHER, ->(match) { "{{%s}}" % match }]
|
123
|
+
else
|
124
|
+
extractor = ->(match) { /#{Regexp.escape("{{%s%s %s %s}}" % match)}/ }
|
125
|
+
tag_type == :full_block ? [text, Parser::BLOCK_MATCHER, extractor] :
|
126
|
+
[text.strip, WHOLE_BLOCK_START_REGEX, extractor]
|
127
|
+
end
|
128
|
+
|
129
|
+
text.scan(regex).map(&extractor)
|
130
|
+
end
|
131
|
+
|
102
132
|
def render_documents(data, text = nil, render_options = {})
|
103
133
|
Hash[
|
104
134
|
@documents.map do |(path, document)|
|
@@ -115,41 +145,51 @@ module LMDocstache
|
|
115
145
|
def problem_paragraphs
|
116
146
|
unusable_tags.flat_map do |tag|
|
117
147
|
@documents.values.inject([]) do |tags, document|
|
118
|
-
faulty_paragraphs = document
|
119
|
-
.
|
120
|
-
|
148
|
+
faulty_paragraphs = document.css('w|p').select do |paragraph|
|
149
|
+
tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
|
150
|
+
paragraph.text =~ tag_regex
|
151
|
+
end
|
121
152
|
|
122
153
|
tags + faulty_paragraphs
|
123
154
|
end
|
124
155
|
end
|
125
156
|
end
|
126
157
|
|
127
|
-
def
|
128
|
-
return if (
|
158
|
+
def flatten_text_blocks(runs_wrapper)
|
159
|
+
return if (children = filtered_children(runs_wrapper)).size < 2
|
129
160
|
|
130
|
-
while
|
131
|
-
|
161
|
+
while node = children.pop
|
162
|
+
is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
|
163
|
+
previous_node = children.last
|
132
164
|
|
133
|
-
|
165
|
+
if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
|
166
|
+
next flatten_text_blocks(node)
|
167
|
+
end
|
168
|
+
next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
|
169
|
+
next if node.at_css('w|tab') || previous_node.at_css('w|tab')
|
170
|
+
|
171
|
+
style_node = node.at_css('w|rPr')
|
134
172
|
style_html = style_node ? style_node.inner_html : ''
|
135
|
-
|
136
|
-
previous_style_node = previous_run_node.at_css('w|rPr')
|
173
|
+
previous_style_node = previous_node.at_css('w|rPr')
|
137
174
|
previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
|
138
|
-
previous_text_node =
|
139
|
-
current_text_node =
|
140
|
-
|
141
|
-
# avoid to merge blocks with tabs
|
142
|
-
next if run_node.at_css('w|tab')
|
143
|
-
next if previous_run_node.at_css('w|tab')
|
175
|
+
previous_text_node = previous_node.at_css('w|t')
|
176
|
+
current_text_node = node.at_css('w|t')
|
177
|
+
whitespace_attr = current_text_node['xml:space']
|
144
178
|
|
145
179
|
next if style_html != previous_style_html
|
146
180
|
next if current_text_node.nil? || previous_text_node.nil?
|
147
181
|
|
148
|
-
previous_text_node
|
149
|
-
|
182
|
+
previous_text_node['xml:space'] = whitespace_attr if whitespace_attr
|
183
|
+
previous_text_node.content = previous_text_node.text + current_text_node.text
|
184
|
+
|
185
|
+
node.unlink
|
150
186
|
end
|
151
187
|
end
|
152
188
|
|
189
|
+
def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
|
190
|
+
Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
|
191
|
+
end
|
192
|
+
|
153
193
|
def unzip_read(zip, zip_path)
|
154
194
|
file = zip.find_entry(zip_path)
|
155
195
|
contents = ""
|
data/lib/lm_docstache/parser.rb
CHANGED
@@ -184,7 +184,8 @@ module LMDocstache
|
|
184
184
|
end
|
185
185
|
|
186
186
|
def has_skippable_variable?(text)
|
187
|
-
return true if hide_custom_tags.find { |(pattern,
|
187
|
+
return true if hide_custom_tags.find { |(pattern, _)| text =~ pattern }
|
188
|
+
|
188
189
|
!!special_variable_replacements.find do |(pattern, value)|
|
189
190
|
text =~ pattern && value == false
|
190
191
|
end
|
data/lib/lm_docstache/version.rb
CHANGED
Binary file
|
data/spec/integration_spec.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require 'securerandom'
|
3
2
|
require 'active_support/core_ext/object/blank.rb'
|
4
3
|
|
5
4
|
module LMDocstache
|
@@ -63,7 +62,7 @@ describe 'integration test', integration: true do
|
|
63
62
|
it 'fixes nested xml errors breaking tags' do
|
64
63
|
expect { document.fix_errors }.to change {
|
65
64
|
document.send(:problem_paragraphs).size
|
66
|
-
}.from(
|
65
|
+
}.from(10).to(1)
|
67
66
|
|
68
67
|
expect(document.send(:problem_paragraphs).first.text).to eq(
|
69
68
|
'{{TAG123-\\-//WITH WE👻IRD CHARS}}'
|
@@ -71,7 +70,18 @@ describe 'integration test', integration: true do
|
|
71
70
|
end
|
72
71
|
|
73
72
|
it 'has the expected amount of usable tags' do
|
74
|
-
expect
|
73
|
+
expect { document.fix_errors }.to change {
|
74
|
+
document.usable_tags.count
|
75
|
+
}.from(29).to(37)
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'keeps "xml:space" attribute when fixing errors' do
|
79
|
+
document.fix_errors
|
80
|
+
|
81
|
+
text_node = document.document.css('w|p').last
|
82
|
+
.css('w|t').find { |node| node.text.include?('that occurred on') }
|
83
|
+
|
84
|
+
expect(text_node['xml:space']).to eq('preserve')
|
75
85
|
end
|
76
86
|
|
77
87
|
it 'has the expected amount of usable roles tags' do
|
@@ -80,13 +90,14 @@ describe 'integration test', integration: true do
|
|
80
90
|
end
|
81
91
|
|
82
92
|
it 'has the expected amount of unique tag names' do
|
83
|
-
expect(document.usable_tag_names.count).to eq(
|
93
|
+
expect(document.usable_tag_names.count).to eq(20)
|
84
94
|
end
|
85
95
|
|
86
96
|
it 'renders file using data' do
|
87
97
|
document.render_file(output_file, data)
|
88
98
|
end
|
89
99
|
end
|
100
|
+
|
90
101
|
context "testing hide custom tags" do
|
91
102
|
before do
|
92
103
|
FileUtils.rm_rf(output_dir) if File.exist?(output_dir)
|
@@ -140,30 +151,5 @@ describe 'integration test', integration: true do
|
|
140
151
|
expect(output).to include('<w:t xml:space="preserve">Test Multiple text in the same line </w:t>')
|
141
152
|
end
|
142
153
|
end
|
143
|
-
|
144
|
-
context "yoooo" do
|
145
|
-
let(:input_file) { "#{base_path}/multi_o.docx" }
|
146
|
-
let(:render_options) {
|
147
|
-
{
|
148
|
-
special_variable_replacements: { "(date|sig|sigfirm|text|check|initial|initials)\\|(req|noreq)\\|(.+?)" => false }.freeze,
|
149
|
-
hide_custom_tags: ['(?:sig|sigfirm|date|check|text|initial)\|(?:req|noreq)\|.+?']
|
150
|
-
}
|
151
|
-
}
|
152
|
-
let(:document) { LMDocstache::Document.new(input_file) }
|
153
|
-
|
154
|
-
it 'should have content replacement aligned with hide custom tags' do
|
155
|
-
doc = document
|
156
|
-
doc.fix_errors
|
157
|
-
new_file_path = "#{Time.now.to_i}-#{SecureRandom.uuid}.docx"
|
158
|
-
n = doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
|
159
|
-
noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
|
160
|
-
output = noko['word/document.xml'].to_xml
|
161
|
-
#puts output
|
162
|
-
#doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
|
163
|
-
#noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
|
164
|
-
#output = noko['word/document.xml'].to_xml
|
165
|
-
#puts output
|
166
|
-
end
|
167
|
-
end
|
168
154
|
end
|
169
155
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lm_docstache
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Roey Chasman
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2021-
|
15
|
+
date: 2021-07-08 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: nokogiri
|