RubyGems - lm_docstache - Versions diffs - 3.0.4 → 3.0.5 - Mend

lm_docstache 3.0.4 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/lib/lm_docstache.rb +1 -1
data/lib/lm_docstache/document.rb +70 -31
data/lib/lm_docstache/parser.rb +2 -1
data/lib/lm_docstache/version.rb +1 -1
data/spec/example_input/ExampleTemplate.docx +0 -0
data/spec/integration_spec.rb +3 -29
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ce8fea2c12829636bd22622e1c022cf2ead4ec09997b7e13322f2b94b4261654
-  data.tar.gz: a25d02153cb1a53bf74111dc59710d83a133a821c4632607c60e06531b28b4ad
+  metadata.gz: cc1a3839b3cfabfd78b144d3f2862aa4a8bf1650bc4037220b2e5af4feaa4a31
+  data.tar.gz: d573c864a49f7e2dcc07122fc242664b597a588253408f065ad94cb0f2823f0a
 SHA512:
-  metadata.gz: 99bcdac9eea8b1d62e0733b692c600be5e8dcb88710f58d22299b6daa7eabab5f5b01eeb83e61668709655869f6580e918ef38d11bb5ae781db4462b38abba65
-  data.tar.gz: ea96603b65d984edfeeb9aa9f9b760c9665af2c5df163e4c3dae076b24ebc10fccb8e1bdee1cc6e01761496a4b89b7454e80965d6a24e96eb67b53b17e46be84
+  metadata.gz: 7b0fb9ff483de6b2e4315206d1961f3ff847612519ed7ff11203f8ca9e7aabd35fc8a5f8730ff552c171082a0d29d2a1a126f86e89a3a117e3a10ab0a5fb5222
+  data.tar.gz: a8c510d591b10ee2d66e6c3ac85995ee9eadb8d91f2178c127e70c12b4184d840cfa725929d4d02b40e62b2e2f9f2c7d629bf65866164833e421a5fdbb5e5c7b

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Changelog
+## 3.0.5
+#### Bug fixes and improvements
+* Improve the way broken tags are detected, making the algorithm wider in terms
+  detecting broken tags, specially if the broken tag is the opening part of
+  conditional tag blocks (which was being detected before these improvements).
+* Improve the way the paragraphs with "unusable" tags are traversed and have
+  their same-style texts merged (hence the "unusable" tags becoming usable). So,
+  from now, `w:hyperlink` elements, for instance, are properly processed as
+  well.
 ## 3.0.4
 * Allow replacement `data` argument to be an `Array`. This feature allow to replace blocks
 in a sequentially order following the sequence of matching blocks order.

data/lib/lm_docstache.rb CHANGED Viewed

@@ -1,9 +1,9 @@
 require 'nokogiri'
 require 'zip'
 require "lm_docstache/version"
+require "lm_docstache/parser"
 require "lm_docstache/document"
 require 'lm_docstache/hide_custom_tags'
-require "lm_docstache/parser"
 require "lm_docstache/condition"
 require "lm_docstache/conditional_block"
 require "lm_docstache/renderer"

data/lib/lm_docstache/document.rb CHANGED Viewed

@@ -1,7 +1,10 @@
 module LMDocstache
   class Document
-    TAGS_REGEXP = /{{.+?}}/
+    WHOLE_BLOCK_START_REGEX = /^#{Parser::BLOCK_START_PATTERN}$/
+    GENERAL_TAG_REGEX = /\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/
     ROLES_REGEXP = /({{(sig|sigfirm|date|check|text|initial)\|(req|noreq)\|(.+?)}})/
+    BLOCK_CHILDREN_ELEMENTS = 'w|r,w|hyperlink,w|ins,w|del'
+    RUN_LIKE_ELEMENTS = 'w|r,w|ins'
     def initialize(*paths)
       raise ArgumentError if paths.empty?
@@ -34,38 +37,48 @@ module LMDocstache
     def tags
       @documents.values.flat_map do |document|
-        document.text.strip.scan(TAGS_REGEXP)
+        document_text = document.text
+        extract_tag_names(document_text) + extract_tag_names(document_text, true)
       end
     end
     def usable_tags
       @documents.values.reduce([]) do |tags, document|
         document.css('w|t').reduce(tags) do |document_tags, text_node|
-          document_tags.push(*text_node.text.scan(TAGS_REGEXP))
+          text = text_node.text
+          document_tags.push(*extract_tag_names(text))
+          document_tags.push(*extract_tag_names(text, true))
         end
       end
     end
     def usable_tag_names
-      usable_tags.reject { |tag| tag =~ ROLES_REGEXP }.map do |tag|
-        tag.scan(/\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/)
-        $1
+      usable_tags.reduce([]) do |memo, tag|
+        next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP
+        tag = tag.source if tag.is_a?(Regexp)
+        memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
       end.compact.uniq
     end
     def unusable_tags
-      unusable_tags = tags
+      conditional_start_tags = text_nodes_containing_only_starting_conditionals.map(&:text)
+      usable_tags.reduce(tags) do |broken_tags, usable_tag|
+        broken_tags.delete_at(broken_tags.index(usable_tag)) && broken_tags
+      end.reject do |broken_tag|
+        operator = broken_tag.is_a?(Regexp) ? :=~ : :==
+        start_tags_index = conditional_start_tags.find_index do |start_tag|
+          broken_tag.send(operator, start_tag)
+        end
-      usable_tags.each do |usable_tag|
-        index = unusable_tags.index(usable_tag)
-        unusable_tags.delete_at(index) if index
+        conditional_start_tags.delete_at(start_tags_index) if start_tags_index
+        !!start_tags_index
       end
-      unusable_tags
     end
     def fix_errors
-      problem_paragraphs.each { |pg| flatten_paragraph(pg) if pg }
+      problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
     end
     def errors?
@@ -99,6 +112,25 @@ module LMDocstache
     private
+    def text_nodes_containing_only_starting_conditionals
+      @documents.values.flat_map do |document|
+        document.css('w|t').select do |paragraph|
+          paragraph.text =~ WHOLE_BLOCK_START_REGEX
+        end
+      end
+    end
+    def extract_tag_names(text, conditional_tag = false)
+      if conditional_tag
+        text.scan(Parser::BLOCK_MATCHER).map do |match|
+          start_block_tag = "{{#{match[0]}#{match[1]} #{match[2]} #{match[3]}}}"
+          /#{Regexp.escape(start_block_tag)}/
+        end
+      else
+        text.scan(Parser::VARIABLE_MATCHER).map { |match| "{{#{match[0]}}}" }
+      end
+    end
     def render_documents(data, text = nil, render_options = {})
       Hash[
         @documents.map do |(path, document)|
@@ -115,41 +147,48 @@ module LMDocstache
     def problem_paragraphs
       unusable_tags.flat_map do |tag|
         @documents.values.inject([]) do |tags, document|
-          faulty_paragraphs = document
-            .css('w|p')
-            .select { |paragraph| paragraph.text =~ /#{Regexp.escape(tag)}/ }
+          faulty_paragraphs = document.css('w|p').select do |paragraph|
+            tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
+            paragraph.text =~ tag_regex
+          end
           tags + faulty_paragraphs
         end
       end
     end
-    def flatten_paragraph(paragraph)
-      return if (run_nodes = paragraph.css('w|r')).size < 2
+    def flatten_text_blocks(runs_wrapper)
+      return if (children = filtered_children(runs_wrapper)).size < 2
+      while node = children.pop
+        is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
+        previous_node = children.last
-      while run_node = run_nodes.pop
-        next if run_nodes.empty?
+        if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
+          next flatten_text_blocks(node)
+        end
+        next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
+        next if node.at_css('w|tab') || previous_node.at_css('w|tab')
-        style_node = run_node.at_css('w|rPr')
+        style_node = node.at_css('w|rPr')
         style_html = style_node ? style_node.inner_html : ''
-        previous_run_node = run_nodes.last
-        previous_style_node = previous_run_node.at_css('w|rPr')
+        previous_style_node = previous_node.at_css('w|rPr')
         previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
-        previous_text_node = previous_run_node.at_css('w|t')
-        current_text_node = run_node.at_css('w|t')
-        # avoid to merge blocks with tabs
-        next if run_node.at_css('w|tab')
-        next if previous_run_node.at_css('w|tab')
+        previous_text_node = previous_node.at_css('w|t')
+        current_text_node = node.at_css('w|t')
         next if style_html != previous_style_html
         next if current_text_node.nil? || previous_text_node.nil?
-        previous_text_node.content = previous_text_node.text + run_node.text
-        run_node.unlink
+        previous_text_node.content = previous_text_node.text + current_text_node.text
+        node.unlink
       end
     end
+    def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
+      Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
+    end
     def unzip_read(zip, zip_path)
       file = zip.find_entry(zip_path)
       contents = ""

data/lib/lm_docstache/parser.rb CHANGED Viewed

@@ -184,7 +184,8 @@ module LMDocstache
     end
     def has_skippable_variable?(text)
-      return true if hide_custom_tags.find { |(pattern, value)| text =~ pattern }
+      return true if hide_custom_tags.find { |(pattern, _)| text =~ pattern }
       !!special_variable_replacements.find do |(pattern, value)|
         text =~ pattern && value == false
       end

data/lib/lm_docstache/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module LMDocstache
-  VERSION = "3.0.4"
+  VERSION = "3.0.5"
 end

data/spec/example_input/ExampleTemplate.docx CHANGED Viewed

Binary file

data/spec/integration_spec.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 require 'spec_helper'
-require 'securerandom'
 require 'active_support/core_ext/object/blank.rb'
 module LMDocstache
@@ -63,7 +62,7 @@ describe 'integration test', integration: true do
     it 'fixes nested xml errors breaking tags' do
       expect { document.fix_errors }.to change {
         document.send(:problem_paragraphs).size
-      }.from(6).to(1)
+      }.from(7).to(1)
       expect(document.send(:problem_paragraphs).first.text).to eq(
         '{{TAG123-\\-//WITH WE👻IRD CHARS}}'
@@ -71,7 +70,7 @@ describe 'integration test', integration: true do
     end
     it 'has the expected amount of usable tags' do
-      expect(document.usable_tags.count).to eq(43)
+      expect(document.usable_tags.count).to eq(21)
     end
     it 'has the expected amount of usable roles tags' do
@@ -80,7 +79,7 @@ describe 'integration test', integration: true do
     end
     it 'has the expected amount of unique tag names' do
-      expect(document.usable_tag_names.count).to eq(19)
+      expect(document.usable_tag_names.count).to eq(14)
     end
     it 'renders file using data' do
@@ -140,30 +139,5 @@ describe 'integration test', integration: true do
         expect(output).to include('<w:t xml:space="preserve">Test Multiple text in the same line </w:t>')
       end
     end
-    context "yoooo" do
-      let(:input_file) { "#{base_path}/multi_o.docx" }
-      let(:render_options) {
-        {
-          special_variable_replacements: { "(date|sig|sigfirm|text|check|initial|initials)\\|(req|noreq)\\|(.+?)" => false }.freeze,
-          hide_custom_tags: ['(?:sig|sigfirm|date|check|text|initial)\|(?:req|noreq)\|.+?']
-        }
-      }
-      let(:document) { LMDocstache::Document.new(input_file) }
-      it 'should have content replacement aligned with hide custom tags' do
-        doc = document
-        doc.fix_errors
-        new_file_path = "#{Time.now.to_i}-#{SecureRandom.uuid}.docx"
-        n = doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
-        noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
-        output = noko['word/document.xml'].to_xml
-        #puts output
-        #doc.render_file(new_file_path, { 'full_name' => 'fred document01' }, render_options)
-        #noko = doc.render_xml({ 'full_name' => 'fred document01' }, render_options)
-        #output = noko['word/document.xml'].to_xml
-        #puts output
-      end
-    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lm_docstache
 version: !ruby/object:Gem::Version
-  version: 3.0.4
+  version: 3.0.5
 platform: ruby
 authors:
 - Roey Chasman
@@ -12,7 +12,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-05-14 00:00:00.000000000 Z
+date: 2021-06-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri