orthotypo 0.6.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +12 -14
- data/README.md +17 -2
- data/lib/orthotypo/composer.rb +69 -20
- data/lib/orthotypo/version.rb +1 -1
- data/spec/composer/fr_spec.rb +15 -1
- metadata +3 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 7d98147f241b463ded341c2d422bc85402af278c1dba5c8120fe25de1335addb
         | 
| 4 | 
            +
              data.tar.gz: aada22cc0374ebd5fdeea55d1d8a0b4ce84734e965f303dec5e34be9edfe82ac
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: b8a9e8e9d6ecd50bd27eb6309836508d23ed182437487dbf7f1d0130de29705aa2ef81fe7ae516dc976352a8f70d3c05146df1d74432fee615b1d47443118306
         | 
| 7 | 
            +
              data.tar.gz: bf718bc6f49c667e203b529fad9a565c8f936ff74bd9cd54b56f60f32bd50c72e39f3a838beee66a315a53c6c1dcb13ff93266111ff1f569bbaaa69025923a38
         | 
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                orthotypo (0. | 
| 4 | 
            +
                orthotypo (1.0.1)
         | 
| 5 5 | 
             
                  htmlentities
         | 
| 6 6 | 
             
                  nokogiri
         | 
| 7 7 |  | 
| @@ -9,24 +9,23 @@ GEM | |
| 9 9 | 
             
              remote: https://rubygems.org/
         | 
| 10 10 | 
             
              specs:
         | 
| 11 11 | 
             
                ast (2.4.2)
         | 
| 12 | 
            -
                base64 (0.1.1)
         | 
| 13 12 | 
             
                byebug (11.1.3)
         | 
| 14 13 | 
             
                diff-lcs (1.5.0)
         | 
| 15 14 | 
             
                htmlentities (4.3.4)
         | 
| 16 | 
            -
                json (2. | 
| 15 | 
            +
                json (2.7.1)
         | 
| 17 16 | 
             
                language_server-protocol (3.17.0.3)
         | 
| 18 | 
            -
                nokogiri (1. | 
| 17 | 
            +
                nokogiri (1.16.0-arm64-darwin)
         | 
| 19 18 | 
             
                  racc (~> 1.4)
         | 
| 20 | 
            -
                nokogiri (1. | 
| 19 | 
            +
                nokogiri (1.16.0-x86_64-darwin)
         | 
| 21 20 | 
             
                  racc (~> 1.4)
         | 
| 22 | 
            -
                parallel (1. | 
| 23 | 
            -
                parser (3. | 
| 21 | 
            +
                parallel (1.24.0)
         | 
| 22 | 
            +
                parser (3.3.0.2)
         | 
| 24 23 | 
             
                  ast (~> 2.4.1)
         | 
| 25 24 | 
             
                  racc
         | 
| 26 | 
            -
                racc (1.7. | 
| 25 | 
            +
                racc (1.7.3)
         | 
| 27 26 | 
             
                rainbow (3.1.1)
         | 
| 28 | 
            -
                rake (13.0 | 
| 29 | 
            -
                regexp_parser (2. | 
| 27 | 
            +
                rake (13.1.0)
         | 
| 28 | 
            +
                regexp_parser (2.9.0)
         | 
| 30 29 | 
             
                rexml (3.2.6)
         | 
| 31 30 | 
             
                rspec (3.12.0)
         | 
| 32 31 | 
             
                  rspec-core (~> 3.12.0)
         | 
| @@ -44,8 +43,7 @@ GEM | |
| 44 43 | 
             
                  rspec (>= 3)
         | 
| 45 44 | 
             
                  terminal-notifier (>= 1.4)
         | 
| 46 45 | 
             
                rspec-support (3.12.1)
         | 
| 47 | 
            -
                rubocop (1. | 
| 48 | 
            -
                  base64 (~> 0.1.1)
         | 
| 46 | 
            +
                rubocop (1.59.0)
         | 
| 49 47 | 
             
                  json (~> 2.3)
         | 
| 50 48 | 
             
                  language_server-protocol (>= 3.17.0)
         | 
| 51 49 | 
             
                  parallel (~> 1.10)
         | 
| @@ -53,10 +51,10 @@ GEM | |
| 53 51 | 
             
                  rainbow (>= 2.2.2, < 4.0)
         | 
| 54 52 | 
             
                  regexp_parser (>= 1.8, < 3.0)
         | 
| 55 53 | 
             
                  rexml (>= 3.2.5, < 4.0)
         | 
| 56 | 
            -
                  rubocop-ast (>= 1. | 
| 54 | 
            +
                  rubocop-ast (>= 1.30.0, < 2.0)
         | 
| 57 55 | 
             
                  ruby-progressbar (~> 1.7)
         | 
| 58 56 | 
             
                  unicode-display_width (>= 2.4.0, < 3.0)
         | 
| 59 | 
            -
                rubocop-ast (1. | 
| 57 | 
            +
                rubocop-ast (1.30.0)
         | 
| 60 58 | 
             
                  parser (>= 3.2.1.0)
         | 
| 61 59 | 
             
                ruby-progressbar (1.13.0)
         | 
| 62 60 | 
             
                terminal-notifier (2.0.0)
         | 
    
        data/README.md
    CHANGED
    
    | @@ -28,12 +28,27 @@ Ajout d'espace fine insécable avant les signes doubles en français. | |
| 28 28 | 
             
            ## Roadmap
         | 
| 29 29 |  | 
| 30 30 | 
             
            ### v1
         | 
| 31 | 
            -
             | 
| 32 | 
            -
            2. Ne pas endommager l'HTML et les HTML entities ( )
         | 
| 31 | 
            +
            Ne pas endommager l'HTML et les HTML entities ( )
         | 
| 33 32 |  | 
| 34 33 | 
             
            ### v2
         | 
| 34 | 
            +
            S'adapter aux locales (détecter I18n)
         | 
| 35 35 | 
             
            Permettre les configs
         | 
| 36 36 |  | 
| 37 | 
            +
            ## Tests
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            ```
         | 
| 40 | 
            +
            rake
         | 
| 41 | 
            +
            ```
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            ```
         | 
| 44 | 
            +
            bundle exec rspec
         | 
| 45 | 
            +
            ```
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            Pour jouer un seul test : 
         | 
| 48 | 
            +
            ```
         | 
| 49 | 
            +
            bundle exec rspec ./spec/composer/fr_spec.rb:56
         | 
| 50 | 
            +
            ```
         | 
| 51 | 
            +
             | 
| 37 52 | 
             
            ## Sources
         | 
| 38 53 |  | 
| 39 54 | 
             
            - https://fr.wikipedia.org/wiki/Code_typographique
         | 
    
        data/lib/orthotypo/composer.rb
    CHANGED
    
    | @@ -19,6 +19,12 @@ module Orthotypo | |
| 19 19 | 
             
                  []
         | 
| 20 20 | 
             
                end
         | 
| 21 21 |  | 
| 22 | 
            +
                def chars_with_space_before_after_digit
         | 
| 23 | 
            +
                  [
         | 
| 24 | 
            +
                    '%'
         | 
| 25 | 
            +
                  ]
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 22 28 | 
             
                def chars_with_space_after
         | 
| 23 29 | 
             
                  [
         | 
| 24 30 | 
             
                    ',',
         | 
| @@ -42,6 +48,22 @@ module Orthotypo | |
| 42 48 | 
             
                  []
         | 
| 43 49 | 
             
                end
         | 
| 44 50 |  | 
| 51 | 
            +
                def chars_with_no_space_around_between_digits
         | 
| 52 | 
            +
                  [
         | 
| 53 | 
            +
                    '/',
         | 
| 54 | 
            +
                    ':'
         | 
| 55 | 
            +
                  ]
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def chars_in_numbers
         | 
| 59 | 
            +
                  [
         | 
| 60 | 
            +
                    '.', 
         | 
| 61 | 
            +
                    ',',
         | 
| 62 | 
            +
                    '/',
         | 
| 63 | 
            +
                    ':'
         | 
| 64 | 
            +
                  ]
         | 
| 65 | 
            +
                end
         | 
| 66 | 
            +
             | 
| 45 67 | 
             
                def is_html?
         | 
| 46 68 | 
             
                  # TODO contains tags?
         | 
| 47 69 | 
             
                  @html || contains_html_entities?
         | 
| @@ -53,13 +75,11 @@ module Orthotypo | |
| 53 75 |  | 
| 54 76 | 
             
                def prepare_ortho
         | 
| 55 77 | 
             
                  @ortho = string.dup
         | 
| 56 | 
            -
                  # @ortho = html_entities.decode(@ortho) if contains_html_entities?
         | 
| 57 78 | 
             
                  @nokogiri = Nokogiri::HTML.fragment @ortho
         | 
| 58 79 | 
             
                end
         | 
| 59 80 |  | 
| 60 81 | 
             
                def clean_ortho
         | 
| 61 82 | 
             
                  @ortho = @nokogiri.to_s
         | 
| 62 | 
            -
                  # @ortho = html_entities.encode(@ortho) if contains_html_entities?
         | 
| 63 83 | 
             
                end
         | 
| 64 84 |  | 
| 65 85 | 
             
                def parse
         | 
| @@ -67,6 +87,7 @@ module Orthotypo | |
| 67 87 | 
             
                  preserve_precious_things
         | 
| 68 88 | 
             
                  # Chars
         | 
| 69 89 | 
             
                  parse_chars_with_space_before
         | 
| 90 | 
            +
                  parse_chars_with_space_before_after_digit
         | 
| 70 91 | 
             
                  parse_chars_with_space_after
         | 
| 71 92 | 
             
                  parse_chars_with_space_around
         | 
| 72 93 | 
             
                  parse_chars_with_no_space_around
         | 
| @@ -74,8 +95,8 @@ module Orthotypo | |
| 74 95 | 
             
                  parse_pairs_with_space_around
         | 
| 75 96 | 
             
                  parse_pairs_with_no_space_around
         | 
| 76 97 | 
             
                  # Numbers
         | 
| 77 | 
            -
                   | 
| 78 | 
            -
                  # | 
| 98 | 
            +
                  parse_chars_in_numbers
         | 
| 99 | 
            +
                  #
         | 
| 79 100 | 
             
                  clean_ortho
         | 
| 80 101 | 
             
                  restore_precious_things
         | 
| 81 102 | 
             
                end
         | 
| @@ -83,20 +104,40 @@ module Orthotypo | |
| 83 104 | 
             
                def preserve_precious_things
         | 
| 84 105 | 
             
                  @precious_things = []
         | 
| 85 106 | 
             
                  @nokogiri.traverse do |node|
         | 
| 86 | 
            -
                     | 
| 87 | 
            -
             | 
| 88 | 
            -
                       | 
| 89 | 
            -
             | 
| 90 | 
            -
                         | 
| 91 | 
            -
             | 
| 92 | 
            -
                       | 
| 93 | 
            -
             | 
| 107 | 
            +
                    if node.text?
         | 
| 108 | 
            +
                      has_leading_space = node.content.start_with? SPACE
         | 
| 109 | 
            +
                      has_trailing_space = node.content.end_with? SPACE
         | 
| 110 | 
            +
                      node.content = node.content.split(SPACE).map { |fragment|
         | 
| 111 | 
            +
                        store_if_precious(fragment)  
         | 
| 112 | 
            +
                      }.join(SPACE)
         | 
| 113 | 
            +
                      node.content = SPACE + node.content if has_leading_space
         | 
| 114 | 
            +
                      node.content = node.content + SPACE if has_trailing_space
         | 
| 115 | 
            +
                    elsif node.element?
         | 
| 116 | 
            +
                      if node.name == 'a'
         | 
| 117 | 
            +
                        node.attributes.each do |key, attribute|
         | 
| 118 | 
            +
                          if attribute.name == 'href'
         | 
| 119 | 
            +
                            attribute.value = store_precious_thing(attribute.value)
         | 
| 120 | 
            +
                          end
         | 
| 121 | 
            +
                        end
         | 
| 94 122 | 
             
                      end
         | 
| 95 | 
            -
                     | 
| 96 | 
            -
                    node.content = new_content
         | 
| 123 | 
            +
                    end
         | 
| 97 124 | 
             
                  end
         | 
| 98 125 | 
             
                end
         | 
| 99 126 |  | 
| 127 | 
            +
                def store_if_precious(string)
         | 
| 128 | 
            +
                  Analyzer::precious?(string) ? store_precious_thing(string)
         | 
| 129 | 
            +
                                              : string
         | 
| 130 | 
            +
                end
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                def store_precious_thing(string)
         | 
| 133 | 
            +
                  # Create token identifier
         | 
| 134 | 
            +
                  token = "#{PRECIOUS_TOKEN}#{@precious_things.length}"
         | 
| 135 | 
            +
                  # Store value
         | 
| 136 | 
            +
                  @precious_things << string
         | 
| 137 | 
            +
                  # Return identifier
         | 
| 138 | 
            +
                  token
         | 
| 139 | 
            +
                end
         | 
| 140 | 
            +
             | 
| 100 141 | 
             
                def restore_precious_things
         | 
| 101 142 | 
             
                  @precious_things.each_with_index do |value, index|
         | 
| 102 143 | 
             
                    @ortho.gsub! "#{PRECIOUS_TOKEN}#{index}", value
         | 
| @@ -106,9 +147,15 @@ module Orthotypo | |
| 106 147 | 
             
                def parse_chars_with_space_before
         | 
| 107 148 | 
             
                  chars_with_space_before.each do |char|
         | 
| 108 149 | 
             
                    # Espace normal avant -> espace fine insécable avant
         | 
| 109 | 
            -
                    fix(SPACE +  | 
| 150 | 
            +
                    fix(SPACE + char, NNBSP + char)
         | 
| 110 151 | 
             
                    # Pas d'espace avant -> espace fine insécable avant
         | 
| 111 | 
            -
                    fix(/([[: | 
| 152 | 
            +
                    fix(/([[:alpha:]])[#{char}]/, "\\1" + NNBSP + char)
         | 
| 153 | 
            +
                  end
         | 
| 154 | 
            +
                end
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                def parse_chars_with_space_before_after_digit
         | 
| 157 | 
            +
                  chars_with_space_before_after_digit.each do |char|
         | 
| 158 | 
            +
                    fix(/([[:digit:]])[#{char}]/, "\\1" + NNBSP + char)
         | 
| 112 159 | 
             
                  end
         | 
| 113 160 | 
             
                end
         | 
| 114 161 |  | 
| @@ -117,7 +164,7 @@ module Orthotypo | |
| 117 164 | 
             
                    # Espace avant -> pas d'espace avant 
         | 
| 118 165 | 
             
                    fix(SPACE + char, char)
         | 
| 119 166 | 
             
                    # Pas d'espace après -> espace après
         | 
| 120 | 
            -
                    fix(/[#{char}]([[: | 
| 167 | 
            +
                    fix(/[#{char}]([[:alpha:]])/, char + SPACE + "\\1")
         | 
| 121 168 | 
             
                  end
         | 
| 122 169 | 
             
                end
         | 
| 123 170 |  | 
| @@ -126,7 +173,7 @@ module Orthotypo | |
| 126 173 | 
             
                    # Espace normal avant -> espace fine insécable avant
         | 
| 127 174 | 
             
                    fix(SPACE + char, NNBSP + char)
         | 
| 128 175 | 
             
                    # Pas d'espace avant -> espace fine insécable avant
         | 
| 129 | 
            -
                    fix(/([[: | 
| 176 | 
            +
                    fix(/([[:alpha:]])[#{char}]/, "\\1" + NNBSP + char)
         | 
| 130 177 | 
             
                  end
         | 
| 131 178 | 
             
                end
         | 
| 132 179 |  | 
| @@ -162,8 +209,10 @@ module Orthotypo | |
| 162 209 | 
             
                  end
         | 
| 163 210 | 
             
                end
         | 
| 164 211 |  | 
| 165 | 
            -
                def  | 
| 166 | 
            -
                   | 
| 212 | 
            +
                def parse_chars_in_numbers
         | 
| 213 | 
            +
                  chars_in_numbers.each do |char|
         | 
| 214 | 
            +
                    fix(/([[:digit:]])[[:space:]][#{char}]([[:digit:]])/, "\\1" + char + "\\2")
         | 
| 215 | 
            +
                    fix(/([[:digit:]])[[:space:]][#{char}][[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
         | 
| 167 216 | 
             
                    fix(/([[:digit:]])[#{char}][[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
         | 
| 168 217 | 
             
                  end
         | 
| 169 218 | 
             
                end
         | 
    
        data/lib/orthotypo/version.rb
    CHANGED
    
    
    
        data/spec/composer/fr_spec.rb
    CHANGED
    
    | @@ -5,7 +5,8 @@ describe Orthotypo::Composer::Fr do | |
| 5 5 | 
             
              it 'adds spaces before double punctuation marks' do
         | 
| 6 6 | 
             
                expect("mot: suite".ortho).to(eq("mot : suite"))
         | 
| 7 7 | 
             
                expect("é: suite".ortho).to(eq("é : suite"))
         | 
| 8 | 
            -
                 | 
| 8 | 
            +
                # Pas automatisable, parce que 11:20
         | 
| 9 | 
            +
                # expect("1: suite".ortho).to(eq("1 : suite"))
         | 
| 9 10 | 
             
                expect("mot; suite".ortho).to(eq("mot ; suite"))
         | 
| 10 11 | 
             
                expect("mot!".ortho).to(eq("mot !"))
         | 
| 11 12 | 
             
                expect("mot !".ortho).to(eq("mot !"))
         | 
| @@ -44,6 +45,18 @@ describe Orthotypo::Composer::Fr do | |
| 44 45 | 
             
                expect("10 %".ortho).to(eq("10 %"))
         | 
| 45 46 | 
             
              end
         | 
| 46 47 |  | 
| 48 | 
            +
              it 'fixes dates/time' do
         | 
| 49 | 
            +
                expect("10/01/2023 16:00".ortho).to(eq("10/01/2023 16:00"))
         | 
| 50 | 
            +
                expect("10/01/2023 16:00:00".ortho).to(eq("10/01/2023 16:00:00"))
         | 
| 51 | 
            +
                expect("10 / 01 / 2023 16:00".ortho).to(eq("10/01/2023 16:00"))
         | 
| 52 | 
            +
                expect("10 / 01 / 2023 16 : 00".ortho).to(eq("10/01/2023 16:00"))
         | 
| 53 | 
            +
                expect("10 octobre 2023 16:00".ortho).to(eq("10 octobre 2023 16:00"))
         | 
| 54 | 
            +
              end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
              it 'does well with HTML' do
         | 
| 57 | 
            +
                expect("<p><a href=\"https://www.linkedin.com/in/marie-dewet-1397a094/\">Marie Dewet</a>, Co-fondatrice de <a href=\"https://www.linkedin.com/company/maisoncleo/\">MaisonCléo</a> nous apporte ses lumières.</p>".ortho).to(eq("<p><a href=\"https://www.linkedin.com/in/marie-dewet-1397a094/\">Marie Dewet</a>, Co-fondatrice de <a href=\"https://www.linkedin.com/company/maisoncleo/\">MaisonCléo</a> nous apporte ses lumières.</p>"))
         | 
| 58 | 
            +
              end
         | 
| 59 | 
            +
             | 
| 47 60 | 
             
              # https://www.scribbr.fr/elements-linguistiques/les-espaces/
         | 
| 48 61 | 
             
              it 'tests de Justine Debret' do
         | 
| 49 62 | 
             
                expect("Elle a vu son cousin,sa tante et son oncle.Ils allaient tous très bien.".ortho).to(eq("Elle a vu son cousin, sa tante et son oncle. Ils allaient tous très bien."))
         | 
| @@ -53,6 +66,7 @@ describe Orthotypo::Composer::Fr do | |
| 53 66 | 
             
                expect("Il a dit : «J’arrive ce matin ( ou plus tard ) à Paris [ rue de la République ] pour son anniversaire.»".ortho).to(eq("Il a dit : « J’arrive ce matin (ou plus tard) à Paris [rue de la République] pour son anniversaire. »"))
         | 
| 54 67 | 
             
                # Le test suivant n'est pas automatisable, parce qu'on ne peut distinguer un Paris-Brest (le gâteau) d'un Paris - Brest (le trajet)
         | 
| 55 68 | 
             
                # expect("Nous l’avons rencontré à Saint - Martin.".ortho).to(eq("Nous l’avons rencontré à Saint-Martin."))
         | 
| 69 | 
            +
                expect("Il roule pendant 31, 5 km.".ortho).to(eq("Il roule pendant 31,5 km."))
         | 
| 56 70 | 
             
                # Le test suivant est-il automatisable ?
         | 
| 57 71 | 
             
                # expect("Il roule pendant 31, 5km.".ortho).to(eq("Il roule pendant 31,5 km."))
         | 
| 58 72 | 
             
                # Pas automatisable, rien ne permet de distinguer s'il s'agit d'un rang ou d'un nombre
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: orthotypo
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 1.0.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Arnaud Levy
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2024-01-09 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: htmlentities
         | 
| @@ -140,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 140 140 | 
             
                - !ruby/object:Gem::Version
         | 
| 141 141 | 
             
                  version: '0'
         | 
| 142 142 | 
             
            requirements: []
         | 
| 143 | 
            -
            rubygems_version: 3.4. | 
| 143 | 
            +
            rubygems_version: 3.4.10
         | 
| 144 144 | 
             
            signing_key:
         | 
| 145 145 | 
             
            specification_version: 4
         | 
| 146 146 | 
             
            summary: Pour un texte correctement typographié
         |