RubyGems - lm_docstache - Versions diffs - 3.0.0 → 3.0.5 - Mend

lm_docstache 3.0.0 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +37 -0
data/lib/lm_docstache.rb +1 -1
data/lib/lm_docstache/document.rb +70 -27
data/lib/lm_docstache/hide_custom_tags.rb +10 -9
data/lib/lm_docstache/parser.rb +46 -2
data/lib/lm_docstache/version.rb +1 -1
data/spec/example_input/ExampleTemplate.docx +0 -0
data/spec/example_input/sample-signature-with-tabs-spacing.docx +0 -0
data/spec/hide_custom_tags_spec.rb +9 -0
data/spec/integration_spec.rb +3 -3
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f59dcaa224f4e8e899df674bc5bd296432ebb24e8e4f929be6b414b8f3a8e0a9
-  data.tar.gz: b05e837bccad1978807e82bd7d13da786d24fc796213f4f2859f6ebf023522f1
+  metadata.gz: cc1a3839b3cfabfd78b144d3f2862aa4a8bf1650bc4037220b2e5af4feaa4a31
+  data.tar.gz: d573c864a49f7e2dcc07122fc242664b597a588253408f065ad94cb0f2823f0a
 SHA512:
-  metadata.gz: 900ff62d4ada49c3645abfd6fbb64f30b6d7f7e0bf0d619d378c1702890dc25d15685e0c950b152242ca3438d9c4e09658788cc36171d0dacad57c37046eb545
-  data.tar.gz: abd44d43c73c3012b1512e56c2c0fa985295876217d66d86a83670c73e23054362a29899608a86e90893cc1b82ce5306770a57e45fdbf71f0bec5cacf6518900
+  metadata.gz: 7b0fb9ff483de6b2e4315206d1961f3ff847612519ed7ff11203f8ca9e7aabd35fc8a5f8730ff552c171082a0d29d2a1a126f86e89a3a117e3a10ab0a5fb5222
+  data.tar.gz: a8c510d591b10ee2d66e6c3ac85995ee9eadb8d91f2178c127e70c12b4184d840cfa725929d4d02b40e62b2e2f9f2c7d629bf65866164833e421a5fdbb5e5c7b

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,42 @@
 # Changelog
+## 3.0.5
+#### Bug fixes and improvements
+* Improve the way broken tags are detected, making the algorithm wider in terms
+  detecting broken tags, specially if the broken tag is the opening part of
+  conditional tag blocks (which was being detected before these improvements).
+* Improve the way the paragraphs with "unusable" tags are traversed and have
+  their same-style texts merged (hence the "unusable" tags becoming usable). So,
+  from now, `w:hyperlink` elements, for instance, are properly processed as
+  well.
+## 3.0.4
+* Allow replacement `data` argument to be an `Array`. This feature allow to replace blocks
+in a sequentially order following the sequence of matching blocks order.
+## 3.0.3
+### Bugfix
+* Hide custom tags arguments was pushing blocks tags to end of paragraph. There are cases this approach
+ doesn't work. I changed to be an ordered replacement when we match hide tags.
+* Avoid to merge Tab tags on fix errors methods. This was causing unexpected document changes.
+## 3.0.2
+### Bugfix
+* Fix replacing tags related to hidden custom tags regexp formats. E.g. tab characters.
+## 3.0.1
+### Bugfix
+* Fix Hide Custom Tag feature when document there is no text inside a w|r we
+  can't split content.
 ## 3.0.0
 ## Breaking Changes

data/lib/lm_docstache.rb CHANGED Viewed

@@ -1,9 +1,9 @@
 require 'nokogiri'
 require 'zip'
 require "lm_docstache/version"
+require "lm_docstache/parser"
 require "lm_docstache/document"
 require 'lm_docstache/hide_custom_tags'
-require "lm_docstache/parser"
 require "lm_docstache/condition"
 require "lm_docstache/conditional_block"
 require "lm_docstache/renderer"

data/lib/lm_docstache/document.rb CHANGED Viewed

@@ -1,7 +1,10 @@
 module LMDocstache
   class Document
-    TAGS_REGEXP = /{{.+?}}/
+    WHOLE_BLOCK_START_REGEX = /^#{Parser::BLOCK_START_PATTERN}$/
+    GENERAL_TAG_REGEX = /\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/
     ROLES_REGEXP = /({{(sig|sigfirm|date|check|text|initial)\|(req|noreq)\|(.+?)}})/
+    BLOCK_CHILDREN_ELEMENTS = 'w|r,w|hyperlink,w|ins,w|del'
+    RUN_LIKE_ELEMENTS = 'w|r,w|ins'
     def initialize(*paths)
       raise ArgumentError if paths.empty?
@@ -34,38 +37,48 @@ module LMDocstache
     def tags
       @documents.values.flat_map do |document|
-        document.text.strip.scan(TAGS_REGEXP)
+        document_text = document.text
+        extract_tag_names(document_text) + extract_tag_names(document_text, true)
       end
     end
     def usable_tags
       @documents.values.reduce([]) do |tags, document|
         document.css('w|t').reduce(tags) do |document_tags, text_node|
-          document_tags.push(*text_node.text.scan(TAGS_REGEXP))
+          text = text_node.text
+          document_tags.push(*extract_tag_names(text))
+          document_tags.push(*extract_tag_names(text, true))
         end
       end
     end
     def usable_tag_names
-      usable_tags.reject { |tag| tag =~ ROLES_REGEXP }.map do |tag|
-        tag.scan(/\{\{[\/#^]?(.+?)(?:(\s((?:==|~=))\s?.+?))?\}\}/)
-        $1
+      usable_tags.reduce([]) do |memo, tag|
+        next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP
+        tag = tag.source if tag.is_a?(Regexp)
+        memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
       end.compact.uniq
     end
     def unusable_tags
-      unusable_tags = tags
+      conditional_start_tags = text_nodes_containing_only_starting_conditionals.map(&:text)
+      usable_tags.reduce(tags) do |broken_tags, usable_tag|
+        broken_tags.delete_at(broken_tags.index(usable_tag)) && broken_tags
+      end.reject do |broken_tag|
+        operator = broken_tag.is_a?(Regexp) ? :=~ : :==
+        start_tags_index = conditional_start_tags.find_index do |start_tag|
+          broken_tag.send(operator, start_tag)
+        end
-      usable_tags.each do |usable_tag|
-        index = unusable_tags.index(usable_tag)
-        unusable_tags.delete_at(index) if index
+        conditional_start_tags.delete_at(start_tags_index) if start_tags_index
+        !!start_tags_index
       end
-      unusable_tags
     end
     def fix_errors
-      problem_paragraphs.each { |pg| flatten_paragraph(pg) if pg }
+      problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
     end
     def errors?
@@ -99,6 +112,25 @@ module LMDocstache
     private
+    def text_nodes_containing_only_starting_conditionals
+      @documents.values.flat_map do |document|
+        document.css('w|t').select do |paragraph|
+          paragraph.text =~ WHOLE_BLOCK_START_REGEX
+        end
+      end
+    end
+    def extract_tag_names(text, conditional_tag = false)
+      if conditional_tag
+        text.scan(Parser::BLOCK_MATCHER).map do |match|
+          start_block_tag = "{{#{match[0]}#{match[1]} #{match[2]} #{match[3]}}}"
+          /#{Regexp.escape(start_block_tag)}/
+        end
+      else
+        text.scan(Parser::VARIABLE_MATCHER).map { |match| "{{#{match[0]}}}" }
+      end
+    end
     def render_documents(data, text = nil, render_options = {})
       Hash[
         @documents.map do |(path, document)|
@@ -115,37 +147,48 @@ module LMDocstache
     def problem_paragraphs
       unusable_tags.flat_map do |tag|
         @documents.values.inject([]) do |tags, document|
-          faulty_paragraphs = document
-            .css('w|p')
-            .select { |paragraph| paragraph.text =~ /#{Regexp.escape(tag)}/ }
+          faulty_paragraphs = document.css('w|p').select do |paragraph|
+            tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
+            paragraph.text =~ tag_regex
+          end
           tags + faulty_paragraphs
         end
       end
     end
-    def flatten_paragraph(paragraph)
-      return if (run_nodes = paragraph.css('w|r')).size < 2
+    def flatten_text_blocks(runs_wrapper)
+      return if (children = filtered_children(runs_wrapper)).size < 2
-      while run_node = run_nodes.pop
-        next if run_nodes.empty?
+      while node = children.pop
+        is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
+        previous_node = children.last
-        style_node = run_node.at_css('w|rPr')
+        if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
+          next flatten_text_blocks(node)
+        end
+        next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
+        next if node.at_css('w|tab') || previous_node.at_css('w|tab')
+        style_node = node.at_css('w|rPr')
         style_html = style_node ? style_node.inner_html : ''
-        previous_run_node = run_nodes.last
-        previous_style_node = previous_run_node.at_css('w|rPr')
+        previous_style_node = previous_node.at_css('w|rPr')
         previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
-        previous_text_node = previous_run_node.at_css('w|t')
-        current_text_node = run_node.at_css('w|t')
+        previous_text_node = previous_node.at_css('w|t')
+        current_text_node = node.at_css('w|t')
         next if style_html != previous_style_html
         next if current_text_node.nil? || previous_text_node.nil?
-        previous_text_node.content = previous_text_node.text + run_node.text
-        run_node.unlink
+        previous_text_node.content = previous_text_node.text + current_text_node.text
+        node.unlink
       end
     end
+    def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
+      Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
+    end
     def unzip_read(zip, zip_path)
       file = zip.find_entry(zip_path)
       contents = ""

data/lib/lm_docstache/hide_custom_tags.rb CHANGED Viewed

@@ -28,16 +28,17 @@ module LMDocstache
           next unless paragraph.text =~ full_pattern
           run_nodes = paragraph.css('w|r')
           while run_node = run_nodes.shift
-            next if run_node.text.to_s.strip.size == 0
-            remainder_run_node = run_node.clone
-            run_node.unlink
-            tag_contents = split_tag_content(remainder_run_node.text, full_pattern)
+            next unless run_node.at_css('w|t')
+            next unless run_node.text =~ full_pattern
+            tag_contents = split_tag_content(run_node.text, full_pattern)
+            replacement_nodes = []
             tag_contents[:content_list].each_with_index do |content, idx|
+              remainder_run_node = run_node.clone
               replace_content(remainder_run_node, content)
-              run_node_with_match = remainder_run_node.dup
               matched_tag = tag_contents[:matched_tags][idx]
-              nodes_list = [remainder_run_node]
+              replacement_nodes << remainder_run_node
               if matched_tag
+                run_node_with_match = run_node.clone
                 replace_style(run_node_with_match)
                 matched_content = matched_tag
                 if value
@@ -46,11 +47,11 @@ module LMDocstache
                        value.to_s
                 end
                 replace_content(run_node_with_match, matched_content)
-                nodes_list << run_node_with_match
+                replacement_nodes << run_node_with_match
               end
-              paragraph << Nokogiri::XML::NodeSet.new(document, nodes_list)
-              remainder_run_node = remainder_run_node.clone
             end
+            run_node.add_next_sibling(Nokogiri::XML::NodeSet.new(document, replacement_nodes))
+            run_node.unlink
           end
         end
       end

data/lib/lm_docstache/parser.rb CHANGED Viewed

@@ -18,7 +18,22 @@ module LMDocstache
     VARIABLE_MATCHER = /{{([^#\^\/].*?)}}/
     attr_reader :document, :data, :blocks, :special_variable_replacements, :hide_custom_tags
+    attr_reader :data_sequential_replacement
+    # Constructor +data+ argument is a +Hash+ where the key is
+    # expected to be a +String+ representing the replacement block value. +Hash+
+    # key must not contain the `{{}}` part, but only the pattern characters.
+    # As for the values of the +Hash+, we have options:
+    #
+    # * +String+  will be the value that will replace matching string.
+    # * +Array<String>+ will be an ordered sequence of values that will replace the matched string following
+    # document matching order.
+    #
+    # Example:
+    # { 'full_name' => 'John Doe', 'text|req|Client' => ['John', 'Matt', 'Paul'] }
+    #
+    # Constructor +options+ argument is a +Hash+ where keys can be:
+    #
     # The +special_variable_replacements+ option is a +Hash+ where the key is
     # expected to be either a +Regexp+ or a +String+ representing the pattern
     # of more specific type of variables that deserves a special treatment. The
@@ -47,7 +62,8 @@ module LMDocstache
     #   will be the value that will replace the matched string
     def initialize(document, data, options = {})
       @document = document
-      @data = data.transform_keys(&:to_s)
+      @data = data.transform_keys(&:to_s).select {|e, v| !v.is_a?(Array) }
+      @data_sequential_replacement = data.transform_keys(&:to_s).select {|e, v| v.is_a?(Array) }
       @special_variable_replacements = add_blocks_to_regexp(options.fetch(:special_variable_replacements, {}))
       @hide_custom_tags = add_blocks_to_regexp(options.fetch(:hide_custom_tags, {}))
     end
@@ -65,6 +81,7 @@ module LMDocstache
       hide_custom_tags!
       find_blocks
       replace_conditional_blocks_in_document!
+      replace_data_sequentially_in_document!
       replace_variables_in_document!
     end
@@ -140,8 +157,35 @@ module LMDocstache
       end
     end
+    def replace_data_sequentially_in_document!
+      data_sequential_replacement.each do |tag_key, values|
+        tag = Regexp.escape("{{#{tag_key}}}")
+        pattern_found = 0
+        document.css('w|t').each do |text_node|
+          text = text_node.text
+          if text.match(tag)
+            text.gsub!(/#{tag}/) do |_match|
+              value = values[pattern_found]
+              # if there is no more available value replace the content with empty string
+              return '' unless value
+              pattern_found +=1
+              value
+            end
+            text_node.content = text
+          end
+        end
+      end
+    end
     def has_skippable_variable?(text)
-      return true if hide_custom_tags.find { |(pattern, value)| text =~ pattern }
+      return true if hide_custom_tags.find { |(pattern, _)| text =~ pattern }
       !!special_variable_replacements.find do |(pattern, value)|
         text =~ pattern && value == false
       end

data/lib/lm_docstache/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module LMDocstache
-  VERSION = "3.0.0"
+  VERSION = "3.0.5"
 end

data/spec/example_input/ExampleTemplate.docx CHANGED Viewed

Binary file

data/spec/example_input/sample-signature-with-tabs-spacing.docx ADDED Viewed

Binary file

data/spec/hide_custom_tags_spec.rb CHANGED Viewed

@@ -85,5 +85,14 @@ describe LMDocstache::HideCustomTags do
         expect(total_replacement).to eq(2)
       end
     end
+    context 'giving a document with tabs spacing in the middle of replacement tags' do
+      let(:input_file) { "#{base_path}/sample-signature-with-tabs-spacing.docx" }
+      it 'expect to not replace tabs' do
+        hide_custom_tags.hide_custom_tags!
+        d = hide_custom_tags.document
+        expect(d.css('w|p w|tab').size).to eq(11)
+      end
+    end
   end
 end

data/spec/integration_spec.rb CHANGED Viewed

@@ -62,7 +62,7 @@ describe 'integration test', integration: true do
     it 'fixes nested xml errors breaking tags' do
       expect { document.fix_errors }.to change {
         document.send(:problem_paragraphs).size
-      }.from(6).to(1)
+      }.from(7).to(1)
       expect(document.send(:problem_paragraphs).first.text).to eq(
         '{{TAG123-\\-//WITH WE👻IRD CHARS}}'
@@ -70,7 +70,7 @@ describe 'integration test', integration: true do
     end
     it 'has the expected amount of usable tags' do
-      expect(document.usable_tags.count).to eq(43)
+      expect(document.usable_tags.count).to eq(21)
     end
     it 'has the expected amount of usable roles tags' do
@@ -79,7 +79,7 @@ describe 'integration test', integration: true do
     end
     it 'has the expected amount of unique tag names' do
-      expect(document.usable_tag_names.count).to eq(19)
+      expect(document.usable_tag_names.count).to eq(14)
     end
     it 'renders file using data' do

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lm_docstache
 version: !ruby/object:Gem::Version
-  version: 3.0.0
+  version: 3.0.5
 platform: ruby
 authors:
 - Roey Chasman
@@ -12,7 +12,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-03-23 00:00:00.000000000 Z
+date: 2021-06-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -123,6 +123,7 @@ files:
 - spec/example_input/blank.docx
 - spec/example_input/docx-no-rpr.docx
 - spec/example_input/sample-signature-blue.docx
+- spec/example_input/sample-signature-with-tabs-spacing.docx
 - spec/example_input/sample-signature.docx
 - spec/hide_custom_tags_spec.rb
 - spec/integration_spec.rb
@@ -157,6 +158,7 @@ test_files:
 - spec/example_input/blank.docx
 - spec/example_input/docx-no-rpr.docx
 - spec/example_input/sample-signature-blue.docx
+- spec/example_input/sample-signature-with-tabs-spacing.docx
 - spec/example_input/sample-signature.docx
 - spec/hide_custom_tags_spec.rb
 - spec/integration_spec.rb