RubyGems - lm_docstache - Versions diffs - 3.0.4 → 3.0.9 - Mend

lm_docstache 3.0.4 → 3.0.9

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +43 -0
data/lib/lm_docstache.rb +1 -1
data/lib/lm_docstache/document.rb +71 -31
data/lib/lm_docstache/parser.rb +2 -1
data/lib/lm_docstache/version.rb +1 -1
data/spec/example_input/ExampleTemplate.docx +0 -0
data/spec/integration_spec.rb +15 -29
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ce8fea2c12829636bd22622e1c022cf2ead4ec09997b7e13322f2b94b4261654
-  data.tar.gz: a25d02153cb1a53bf74111dc59710d83a133a821c4632607c60e06531b28b4ad
+  metadata.gz: b340972dc8dbd8f4f0e2bbdd8b385468d626d9f5f4ea3d9090cd3c96cba5ab05
+  data.tar.gz: f901539b68bff0bf5bb736aaa3c4636ade982fe23adfe7ae70ba41a926186c9a
 SHA512:
-  metadata.gz: 99bcdac9eea8b1d62e0733b692c600be5e8dcb88710f58d22299b6daa7eabab5f5b01eeb83e61668709655869f6580e918ef38d11bb5ae781db4462b38abba65
-  data.tar.gz: ea96603b65d984edfeeb9aa9f9b760c9665af2c5df163e4c3dae076b24ebc10fccb8e1bdee1cc6e01761496a4b89b7454e80965d6a24e96eb67b53b17e46be84
+  metadata.gz: 49273e8a8c7133cbf4ef02f0c6bb66b64c19408e75a010cbce313557bcf22407b493efe57e66d760bf9327d68b32a2008981937780d2ce0cb2c047ec0a1c9fae
+  data.tar.gz: bf8a47eea31424c0f2a20c35063c73fd93353d9fd315f028b18591bb1cd3f4d3d52c83ae668713bc74d4618fd95dcc33006523aefb4fc54a1430dfaf38017621

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,48 @@
 # Changelog
+## 3.0.9
+#### Bug fixes
+* Text nodes merged in paragraphs with problems through `fix_errors` private
+  method have now the "xml:space" attribute preserved from now on.
+## 3.0.8
+#### Bug fixes
+* Fix a bug on `usable_tags` method, so it now properly and expectedly
+  includes conditional tag names that have its opening tag markup as the sole
+  content of paragraphs (which represents conditional blocks where both
+  opening and closing tags are in separate parapraghs sorrounding one or more
+  paragraphs as its conditional block content).
+## 3.0.7
+#### Bug fixes
+* Fix a bug on `usable_tag_names` method, so it now properly and expectedly
+  includes conditional tag names as well, as before.
+## 3.0.6
+#### Bug fixes
+* Fix bug on `LMDocstache::Docstache#unusable_tags` method, where `nil` could be
+  passed to `broken_tags.deleted_at` call.
+## 3.0.5
+#### Bug fixes and improvements
+* Improve the way broken tags are detected, making the algorithm wider in terms
+  detecting broken tags, specially if the broken tag is the opening part of
+  conditional tag blocks (which was being detected before these improvements).
+* Improve the way the paragraphs with "unusable" tags are traversed and have
+  their same-style texts merged (hence the "unusable" tags becoming usable). So,
+  from now, `w:hyperlink` elements, for instance, are properly processed as
+  well.
 ## 3.0.4
 * Allow replacement `data` argument to be an `Array`. This feature allow to replace blocks
 in a sequentially order following the sequence of matching blocks order.

data/lib/lm_docstache.rb CHANGED Viewed

@@ -1,9 +1,9 @@
 require 'nokogiri'
 require 'zip'
 require "lm_docstache/version"
+require "lm_docstache/parser"
 require "lm_docstache/document"
 require 'lm_docstache/hide_custom_tags'
-require "lm_docstache/parser"
 require "lm_docstache/condition"
 require "lm_docstache/conditional_block"
 require "lm_docstache/renderer"

data/lib/lm_docstache/document.rb CHANGED Viewed

@@ -1,7 +1,12 @@
 module LMDocstache
   class Document
-    TAGS_REGEXP = /{{.+?}}/
+    WHOLE_BLOCK_START_REGEX = /^#{Parser::BLOCK_START_PATTERN}$/
+    GENERAL_TAG_REGEX = /\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/
     ROLES_REGEXP = /({{(sig|sigfirm|date|check|text|initial)\|(req|noreq)\|(.+?)}})/
+    BLOCK_CHILDREN_ELEMENTS = 'w|r,w|hyperlink,w|ins,w|del'
+    RUN_LIKE_ELEMENTS = 'w|r,w|ins'
+    attr_reader :document
     def initialize(*paths)
       raise ArgumentError if paths.empty?
@@ -34,38 +39,41 @@ module LMDocstache
     def tags
       @documents.values.flat_map do |document|
-        document.text.strip.scan(TAGS_REGEXP)
+        document_text = document.text
+        extract_tag_names(document_text) + extract_tag_names(document_text, :full_block)
       end
     end
     def usable_tags
       @documents.values.reduce([]) do |tags, document|
         document.css('w|t').reduce(tags) do |document_tags, text_node|
-          document_tags.push(*text_node.text.scan(TAGS_REGEXP))
+          text = text_node.text
+          document_tags.push(*extract_tag_names(text))
+          document_tags.push(*extract_tag_names(text, :start_block))
+          document_tags.push(*extract_tag_names(text, :full_block))
         end
       end
     end
     def usable_tag_names
-      usable_tags.reject { |tag| tag =~ ROLES_REGEXP }.map do |tag|
-        tag.scan(/\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/)
-        $1
+      usable_tags.reduce([]) do |memo, tag|
+        next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP
+        tag = unescape_escaped_start_block(tag.source) if tag.is_a?(Regexp)
+        memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
       end.compact.uniq
     end
     def unusable_tags
-      unusable_tags = tags
+      usable_tags.reduce(tags) do |broken_tags, usable_tag|
+        next broken_tags unless index = broken_tags.index(usable_tag)
-      usable_tags.each do |usable_tag|
-        index = unusable_tags.index(usable_tag)
-        unusable_tags.delete_at(index) if index
+        broken_tags.delete_at(index) && broken_tags
       end
-      unusable_tags
     end
     def fix_errors
-      problem_paragraphs.each { |pg| flatten_paragraph(pg) if pg }
+      problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
     end
     def errors?
@@ -99,6 +107,28 @@ module LMDocstache
     private
+    def unescape_escaped_start_block(regex_source_string)
+      regex_source_string
+        .gsub('\\{', '{')
+        .gsub('\\#', '#')
+        .gsub('\\}', '}')
+        .gsub('\\^', '^')
+        .gsub('\\ ', ' ')
+    end
+    def extract_tag_names(text, tag_type = :variable)
+      text, regex, extractor =
+        if tag_type == :variable
+          [text, Parser::VARIABLE_MATCHER, ->(match) { "{{%s}}" % match }]
+        else
+          extractor = ->(match) { /#{Regexp.escape("{{%s%s %s %s}}" % match)}/ }
+          tag_type == :full_block ? [text, Parser::BLOCK_MATCHER, extractor] :
+            [text.strip, WHOLE_BLOCK_START_REGEX, extractor]
+        end
+      text.scan(regex).map(&extractor)
+    end
     def render_documents(data, text = nil, render_options = {})
       Hash[
         @documents.map do |(path, document)|
@@ -115,41 +145,51 @@ module LMDocstache
     def problem_paragraphs
       unusable_tags.flat_map do |tag|
         @documents.values.inject([]) do |tags, document|
-          faulty_paragraphs = document
-            .css('w|p')
-            .select { |paragraph| paragraph.text =~ /#{Regexp.escape(tag)}/ }
+          faulty_paragraphs = document.css('w|p').select do |paragraph|
+            tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
+            paragraph.text =~ tag_regex
+          end
           tags + faulty_paragraphs
         end
       end
     end
-    def flatten_paragraph(paragraph)
-      return if (run_nodes = paragraph.css('w|r')).size < 2
+    def flatten_text_blocks(runs_wrapper)
+      return if (children = filtered_children(runs_wrapper)).size < 2
-      while run_node = run_nodes.pop
-        next if run_nodes.empty?
+      while node = children.pop
+        is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
+        previous_node = children.last
-        style_node = run_node.at_css('w|rPr')
+        if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
+          next flatten_text_blocks(node)
+        end
+        next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
+        next if node.at_css('w|tab') || previous_node.at_css('w|tab')
+        style_node = node.at_css('w|rPr')
         style_html = style_node ? style_node.inner_html : ''
-        previous_run_node = run_nodes.last
-        previous_style_node = previous_run_node.at_css('w|rPr')
+        previous_style_node = previous_node.at_css('w|rPr')
         previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
-        previous_text_node = previous_run_node.at_css('w|t')
-        current_text_node = run_node.at_css('w|t')
-        # avoid to merge blocks with tabs
-        next if run_node.at_css('w|tab')
-        next if previous_run_node.at_css('w|tab')
+        previous_text_node = previous_node.at_css('w|t')
+        current_text_node = node.at_css('w|t')
+        whitespace_attr = current_text_node['xml:space']
         next if style_html != previous_style_html
         next if current_text_node.nil? || previous_text_node.nil?
-        previous_text_node.content = previous_text_node.text + run_node.text
-        run_node.unlink
+        previous_text_node['xml:space'] = whitespace_attr if whitespace_attr
+        previous_text_node.content = previous_text_node.text + current_text_node.text
+        node.unlink
       end
     end
+    def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
+      Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
+    end
     def unzip_read(zip, zip_path)
       file = zip.find_entry(zip_path)
       contents = ""

data/lib/lm_docstache/parser.rb CHANGED Viewed

@@ -184,7 +184,8 @@ module LMDocstache
     end
     def has_skippable_variable?(text)
-      return true if hide_custom_tags.find { |(pattern, value)| text =~ pattern }
+      return true if hide_custom_tags.find { |(pattern, _)| text =~ pattern }
       !!special_variable_replacements.find do |(pattern, value)|
         text =~ pattern && value == false
       end

data/lib/lm_docstache/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module LMDocstache
-  VERSION = "3.0.4"
+  VERSION = "3.0.9"
 end

data/spec/example_input/ExampleTemplate.docx CHANGED Viewed

Binary file

data/spec/integration_spec.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 require 'spec_helper'
-require 'securerandom'
 require 'active_support/core_ext/object/blank.rb'
 module LMDocstache
@@ -63,7 +62,7 @@ describe 'integration test', integration: true do
     it 'fixes nested xml errors breaking tags' do
       expect { document.fix_errors }.to change {
         document.send(:problem_paragraphs).size
-      }.from(6).to(1)
+      }.from(10).to(1)
       expect(document.send(:problem_paragraphs).first.text).to eq(
         '{{TAG123-\\-//WITH WE👻IRD CHARS}}'
@@ -71,7 +70,18 @@ describe 'integration test', integration: true do
     end
     it 'has the expected amount of usable tags' do
-      expect(document.usable_tags.count).to eq(43)
+      expect { document.fix_errors }.to change {
+        document.usable_tags.count
+      }.from(29).to(37)
+    end
+    it 'keeps "xml:space" attribute when fixing errors' do
+      document.fix_errors
+      text_node = document.document.css('w|p').last
+        .css('w|t').find { |node| node.text.include?('that occurred on') }
+      expect(text_node['xml:space']).to eq('preserve')
     end
     it 'has the expected amount of usable roles tags' do
@@ -80,13 +90,14 @@ describe 'integration test', integration: true do
     end
     it 'has the expected amount of unique tag names' do
-      expect(document.usable_tag_names.count).to eq(19)
+      expect(document.usable_tag_names.count).to eq(20)
     end
     it 'renders file using data' do
       document.render_file(output_file, data)
     end
   end
   context "testing hide custom tags" do
     before do
       FileUtils.rm_rf(output_dir) if File.exist?(output_dir)
@@ -140,30 +151,5 @@ describe 'integration test', integration: true do
         expect(output).to include('<w:t xml:space="preserve">Test Multiple text in the same line </w:t>')
       end
     end
-    context "yoooo" do
-      let(:input_file) { "#{base_path}/multi_o.docx" }
-      let(:render_options) {
-        {
-          special_variable_replacements: { "(date|sig|sigfirm|text|check|initial|initials)\\|(req|noreq)\\|(.+?)" => false }.freeze,
-          hide_custom_tags: ['(?:sig|sigfirm|date|check|text|initial)\|(?:req|noreq)\|.+?']
-        }
-      }
-      let(:document) { LMDocstache::Document.new(input_file) }
-      it 'should have content replacement aligned with hide custom tags' do
-        doc = document
-        doc.fix_errors
-        new_file_path = "#{Time.now.to_i}-#{SecureRandom.uuid}.docx"
-        n = doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
-        noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
-        output = noko['word/document.xml'].to_xml
-        #puts output
-        #doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
-        #noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
-        #output = noko['word/document.xml'].to_xml
-        #puts output
-      end
-    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lm_docstache
 version: !ruby/object:Gem::Version
-  version: 3.0.4
+  version: 3.0.9
 platform: ruby
 authors:
 - Roey Chasman
@@ -12,7 +12,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-05-14 00:00:00.000000000 Z
+date: 2021-07-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri