ghostwriter 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/RELEASE_NOTES.md +14 -0
- data/lib/ghostwriter/version.rb +1 -1
- data/lib/ghostwriter/writer.rb +39 -18
- metadata +1 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 7851b6a60c2dc482938d9ea35844c9508384e508c6f20dc1bf73fcdeb91d4d15
         | 
| 4 | 
            +
              data.tar.gz: e35c2c30d5a523b07e05c73764c24b9e60eb361f72b48ece4f10bb254f856d67
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: fd0dc41dfb3f473eaa47ec5b6f148cf1984c358970382e508255342b320e67c83953c45e01a07a2dd4cf0feb286375608243f7c8550843b07f96a583c1fc415d
         | 
| 7 | 
            +
              data.tar.gz: 01d7a3b8728f17a131d3dd5aab5d20799b271a9c84fcc04ee2cb29bf38506a856043bfce32ff69db68f6ccd5a727cab685b259c03d26e4fd5e68e9d6c45c5dd7
         | 
    
        data/RELEASE_NOTES.md
    CHANGED
    
    | @@ -1,5 +1,19 @@ | |
| 1 1 | 
             
            # Release Notes
         | 
| 2 2 |  | 
| 3 | 
            +
            ## 0.4.1 (2021-03-17)
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ### Major
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            * none
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            ### Minor
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            * No longer provides link target in brackets after link text when they are the same
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            ### Bugfixes
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            * Added explicit testing for HTML entity interpretation
         | 
| 16 | 
            +
             | 
| 3 17 | 
             
            ## 0.4.0 (2021-03-16)
         | 
| 4 18 |  | 
| 5 19 | 
             
            ### Major
         | 
    
        data/lib/ghostwriter/version.rb
    CHANGED
    
    
    
        data/lib/ghostwriter/writer.rb
    CHANGED
    
    | @@ -20,7 +20,7 @@ module Ghostwriter | |
| 20 20 |  | 
| 21 21 | 
             
                     replace_anchors(doc, link_base)
         | 
| 22 22 | 
             
                     replace_headers(doc)
         | 
| 23 | 
            -
                      | 
| 23 | 
            +
                     replace_tables(doc)
         | 
| 24 24 |  | 
| 25 25 | 
             
                     simple_replace(doc, 'hr', "\n----------\n")
         | 
| 26 26 | 
             
                     simple_replace(doc, 'br', "\n")
         | 
| @@ -40,33 +40,40 @@ module Ghostwriter | |
| 40 40 | 
             
                  end
         | 
| 41 41 |  | 
| 42 42 | 
             
                  def replace_anchors(doc, link_base)
         | 
| 43 | 
            -
                      | 
| 44 | 
            -
                     base     = doc.search('base').first
         | 
| 45 | 
            -
                     base_url = base ? base['href'] : link_base
         | 
| 43 | 
            +
                     base = get_link_base(doc, default: link_base)
         | 
| 46 44 |  | 
| 47 45 | 
             
                     doc.search('a').each do |link_node|
         | 
| 48 46 | 
             
                        href = URI(link_node['href'])
         | 
| 49 | 
            -
                        href =  | 
| 47 | 
            +
                        href = base + href.to_s unless href.absolute?
         | 
| 50 48 |  | 
| 51 | 
            -
                        link_node.inner_html =  | 
| 49 | 
            +
                        link_node.inner_html = if link_matches(href, link_node.inner_html)
         | 
| 50 | 
            +
                                                  href.to_s
         | 
| 51 | 
            +
                                               else
         | 
| 52 | 
            +
                                                  "#{ link_node.inner_html } (#{ href })"
         | 
| 53 | 
            +
                                               end
         | 
| 52 54 | 
             
                     end
         | 
| 53 55 | 
             
                  end
         | 
| 54 56 |  | 
| 57 | 
            +
                  def link_matches(first, second)
         | 
| 58 | 
            +
                     first.to_s.gsub(%r{^https?://}, '').chomp('/') == second.gsub(%r{^https?://}, '').chomp('/')
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                  def get_link_base(doc, default:)
         | 
| 62 | 
            +
                     # <base> node is unique by W3C spec
         | 
| 63 | 
            +
                     base_node = doc.search('base').first
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                     base_node ? base_node['href'] : default
         | 
| 66 | 
            +
                  end
         | 
| 67 | 
            +
             | 
| 55 68 | 
             
                  def replace_headers(doc)
         | 
| 56 69 | 
             
                     doc.search('header, h1, h2, h3, h4, h5, h6').each do |node|
         | 
| 57 70 | 
             
                        node.inner_html = "- #{ node.inner_html } -\n".squeeze(' ')
         | 
| 58 71 | 
             
                     end
         | 
| 59 72 | 
             
                  end
         | 
| 60 73 |  | 
| 61 | 
            -
                  def  | 
| 74 | 
            +
                  def replace_tables(doc)
         | 
| 62 75 | 
             
                     doc.css('table').each do |table|
         | 
| 63 | 
            -
                        column_sizes = table | 
| 64 | 
            -
                           row.search('th', 'td').collect do |node|
         | 
| 65 | 
            -
                              node.inner_html.length
         | 
| 66 | 
            -
                           end
         | 
| 67 | 
            -
                        end
         | 
| 68 | 
            -
             | 
| 69 | 
            -
                        column_sizes = column_sizes.transpose.collect(&:max)
         | 
| 76 | 
            +
                        column_sizes = calculate_column_sizes(table)
         | 
| 70 77 |  | 
| 71 78 | 
             
                        table.search('./thead/tr', './tbody/tr', './tr').each do |row|
         | 
| 72 79 | 
             
                           replace_table_nodes(row, column_sizes)
         | 
| @@ -74,13 +81,27 @@ module Ghostwriter | |
| 74 81 | 
             
                           row.inner_html = "#{ row.inner_html }|\n"
         | 
| 75 82 | 
             
                        end
         | 
| 76 83 |  | 
| 77 | 
            -
                        table | 
| 78 | 
            -
                           header_bottom = "|#{ column_sizes.collect { |len| ('-' * (len + 2)) }.join('|') }|"
         | 
| 84 | 
            +
                        add_table_header_underline(table, column_sizes)
         | 
| 79 85 |  | 
| 80 | 
            -
             | 
| 86 | 
            +
                        table.inner_html = "#{ table.inner_html }\n"
         | 
| 87 | 
            +
                     end
         | 
| 88 | 
            +
                  end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                  def calculate_column_sizes(table)
         | 
| 91 | 
            +
                     column_sizes = table.search('tr').collect do |row|
         | 
| 92 | 
            +
                        row.search('th', 'td').collect do |node|
         | 
| 93 | 
            +
                           node.inner_html.length
         | 
| 81 94 | 
             
                        end
         | 
| 95 | 
            +
                     end
         | 
| 82 96 |  | 
| 83 | 
            -
             | 
| 97 | 
            +
                     column_sizes.transpose.collect(&:max)
         | 
| 98 | 
            +
                  end
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                  def add_table_header_underline(table, column_sizes)
         | 
| 101 | 
            +
                     table.search('./thead').each do |row|
         | 
| 102 | 
            +
                        header_bottom = "|#{ column_sizes.collect { |len| ('-' * (len + 2)) }.join('|') }|"
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                        row.inner_html = "#{ row.inner_html }#{ header_bottom }\n"
         | 
| 84 105 | 
             
                     end
         | 
| 85 106 | 
             
                  end
         | 
| 86 107 |  |