orthotypo 0.5.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '05389ada5ba288061bd70f8e3170fb77b7d4d9df171b90e55d3c0c6edbf4d810'
4
- data.tar.gz: edd5731b28644f5515133994a89185f2da4fc5cffb7c12273f2d1f48985571be
3
+ metadata.gz: fd24cbdb2c5adaa4f6ecf91acf144025549b7599aeb0f944e3f1987331448450
4
+ data.tar.gz: 6d9a7bb478c4cc0b76722128a1065fc99666f7d1f284cc82cb6f26acfe89bf70
5
5
  SHA512:
6
- metadata.gz: 32280cc5abbc8b3d9a728db9943f815821c84c97384f59f6be2f0ba12789e2ef204a8c72ec579a2544cea505b9c2f3d46c51aef5db7006ac74e3e8d568813a85
7
- data.tar.gz: b81b849cf147939f7ad2e6fb16ccbc3af8eed2016efa67a288fcc9fc2c7cbd3d9d2d4b78ad4d70588fd75dade26830e4443627024838404f01e4010cbb5f2989
6
+ metadata.gz: e51b243378c8d97df6fd3bfa7f59a7aeb7f516d3f54d82cce4eb0ae8d3c7b6a5893211c857c72f8c249a73e0333f3d187ddf4d6145a45ab6de5039633aeaf457
7
+ data.tar.gz: dc77c1043a6210122c48a8d5849d63df04247d3d957707469d9e3029ed0cf1a9bec745cd12115bb4563254abc6e227c74e13f02285fc0cada9a0d75ab0be2ad6
data/Gemfile.lock CHANGED
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- orthotypo (0.5.1)
4
+ orthotypo (1.0.0)
5
5
  htmlentities
6
+ nokogiri
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
@@ -14,6 +15,10 @@ GEM
14
15
  htmlentities (4.3.4)
15
16
  json (2.6.3)
16
17
  language_server-protocol (3.17.0.3)
18
+ nokogiri (1.15.4-arm64-darwin)
19
+ racc (~> 1.4)
20
+ nokogiri (1.15.4-x86_64-darwin)
21
+ racc (~> 1.4)
17
22
  parallel (1.23.0)
18
23
  parser (3.2.2.4)
19
24
  ast (~> 2.4.1)
data/README.md CHANGED
@@ -28,10 +28,10 @@ Ajout d'espace fine insécable avant les signes doubles en français.
28
28
  ## Roadmap
29
29
 
30
30
  ### v1
31
- 1. S'adapter aux locales (détecter I18n)
32
- 2. Ne pas endommager l'HTML et les HTML entities ( )
31
+ Ne pas endommager l'HTML et les HTML entities ( )
33
32
 
34
33
  ### v2
34
+ S'adapter aux locales (détecter I18n)
35
35
  Permettre les configs
36
36
 
37
37
  ## Sources
@@ -0,0 +1,18 @@
1
+ module Orthotypo
2
+ class Analyzer
3
+
4
+ def self.url?(string)
5
+ (string =~ /\A#{URI::DEFAULT_PARSER.regexp[:ABS_URI]}\z/) &&
6
+ (string =~ /\A(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?\z/ix) ? true : false
7
+ end
8
+
9
+ def self.email?(string)
10
+ string =~ /\A#{URI::MailTo::EMAIL_REGEXP}\z/ ? true : false
11
+ end
12
+
13
+ def self.precious?(string)
14
+ email?(string) || url?(string)
15
+ end
16
+
17
+ end
18
+ end
@@ -5,6 +5,7 @@ module Orthotypo
5
5
  SPACE = ' '.freeze
6
6
  NBSP = ' '.freeze
7
7
  NNBSP = ' '.freeze
8
+ PRECIOUS_TOKEN = 'orthotypopreciousthing'
8
9
 
9
10
  def initialize(string, html: nil)
10
11
  @string = string
@@ -18,6 +19,12 @@ module Orthotypo
18
19
  []
19
20
  end
20
21
 
22
+ def chars_with_space_before_after_digit
23
+ [
24
+ '%'
25
+ ]
26
+ end
27
+
21
28
  def chars_with_space_after
22
29
  [
23
30
  ',',
@@ -41,6 +48,22 @@ module Orthotypo
41
48
  []
42
49
  end
43
50
 
51
+ def chars_with_no_space_around_between_digits
52
+ [
53
+ '/',
54
+ ':'
55
+ ]
56
+ end
57
+
58
+ def chars_in_numbers
59
+ [
60
+ '.',
61
+ ',',
62
+ '/',
63
+ ':'
64
+ ]
65
+ end
66
+
44
67
  def is_html?
45
68
  # TODO contains tags?
46
69
  @html || contains_html_entities?
@@ -52,17 +75,21 @@ module Orthotypo
52
75
 
53
76
  def prepare_ortho
54
77
  @ortho = string.dup
55
- @ortho = html_entities.decode(@ortho) if contains_html_entities?
78
+ # @ortho = html_entities.decode(@ortho) if contains_html_entities?
79
+ @nokogiri = Nokogiri::HTML.fragment @ortho
56
80
  end
57
81
 
58
82
  def clean_ortho
59
- @ortho = html_entities.encode(@ortho) if contains_html_entities?
83
+ @ortho = @nokogiri.to_s
84
+ # @ortho = html_entities.encode(@ortho) if contains_html_entities?
60
85
  end
61
86
 
62
87
  def parse
63
88
  prepare_ortho
89
+ preserve_precious_things
64
90
  # Chars
65
91
  parse_chars_with_space_before
92
+ parse_chars_with_space_before_after_digit
66
93
  parse_chars_with_space_after
67
94
  parse_chars_with_space_around
68
95
  parse_chars_with_no_space_around
@@ -70,17 +97,47 @@ module Orthotypo
70
97
  parse_pairs_with_space_around
71
98
  parse_pairs_with_no_space_around
72
99
  # Numbers
73
- parse_numbers
100
+ parse_chars_in_numbers
74
101
  #
75
102
  clean_ortho
103
+ restore_precious_things
104
+ end
105
+
106
+ def preserve_precious_things
107
+ @precious_things = []
108
+ @nokogiri.traverse do |node|
109
+ next unless node.text?
110
+ new_content = node.content.split(SPACE).map { |fragment|
111
+ if Analyzer::precious?(fragment)
112
+ token = "#{PRECIOUS_TOKEN}#{@precious_things.length}"
113
+ @precious_things << fragment
114
+ token
115
+ else
116
+ fragment
117
+ end
118
+ }.join(SPACE)
119
+ node.content = new_content
120
+ end
121
+ end
122
+
123
+ def restore_precious_things
124
+ @precious_things.each_with_index do |value, index|
125
+ @ortho.gsub! "#{PRECIOUS_TOKEN}#{index}", value
126
+ end
76
127
  end
77
128
 
78
129
  def parse_chars_with_space_before
79
130
  chars_with_space_before.each do |char|
80
131
  # Espace normal avant -> espace fine insécable avant
81
- fix(SPACE + '%', NNBSP + '%')
132
+ fix(SPACE + char, NNBSP + char)
82
133
  # Pas d'espace avant -> espace fine insécable avant
83
- fix(/([[:alnum:]])%/, "\\1" + NNBSP + '%')
134
+ fix(/([[:alpha:]])[#{char}]/, "\\1" + NNBSP + char)
135
+ end
136
+ end
137
+
138
+ def parse_chars_with_space_before_after_digit
139
+ chars_with_space_before_after_digit.each do |char|
140
+ fix(/([[:digit:]])[#{char}]/, "\\1" + NNBSP + char)
84
141
  end
85
142
  end
86
143
 
@@ -89,7 +146,7 @@ module Orthotypo
89
146
  # Espace avant -> pas d'espace avant
90
147
  fix(SPACE + char, char)
91
148
  # Pas d'espace après -> espace après
92
- fix(/[#{char}]([[:alnum:]])/, char + SPACE + "\\1")
149
+ fix(/[#{char}]([[:alpha:]])/, char + SPACE + "\\1")
93
150
  end
94
151
  end
95
152
 
@@ -98,7 +155,7 @@ module Orthotypo
98
155
  # Espace normal avant -> espace fine insécable avant
99
156
  fix(SPACE + char, NNBSP + char)
100
157
  # Pas d'espace avant -> espace fine insécable avant
101
- fix(/([[:alnum:]])[#{char}]/, "\\1" + NNBSP + char)
158
+ fix(/([[:alpha:]])[#{char}]/, "\\1" + NNBSP + char)
102
159
  end
103
160
  end
104
161
 
@@ -134,8 +191,10 @@ module Orthotypo
134
191
  end
135
192
  end
136
193
 
137
- def parse_numbers
138
- ['.', ','].each do |char|
194
+ def parse_chars_in_numbers
195
+ chars_in_numbers.each do |char|
196
+ fix(/([[:digit:]])[[:space:]][#{char}]([[:digit:]])/, "\\1" + char + "\\2")
197
+ fix(/([[:digit:]])[[:space:]][#{char}][[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
139
198
  fix(/([[:digit:]])[#{char}][[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
140
199
  end
141
200
  end
@@ -145,7 +204,10 @@ module Orthotypo
145
204
  end
146
205
 
147
206
  def fix(bad, good)
148
- @ortho.gsub! bad, good
207
+ @nokogiri.traverse do |node|
208
+ next unless node.text?
209
+ node.content = node.content.gsub(bad, good)
210
+ end
149
211
  end
150
212
  end
151
213
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Orthotypo
4
- VERSION = "0.5.1"
4
+ VERSION = "1.0.0"
5
5
  end
data/lib/orthotypo.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "htmlentities"
4
+ require "nokogiri"
5
+ require "uri"
6
+ require_relative "orthotypo/analyzer"
4
7
  require_relative "orthotypo/composer"
5
8
  require_relative "orthotypo/composer/fr"
6
9
  require_relative "orthotypo/composer/fr_fr"
data/orthotypo.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = "lib"
22
22
 
23
23
  spec.add_dependency "htmlentities"
24
+ spec.add_dependency "nokogiri"
24
25
 
25
26
  spec.add_development_dependency "rspec"
26
27
  spec.add_development_dependency "rspec-nc"
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+
3
+ describe Orthotypo::Analyzer do
4
+ it 'find urls' do
5
+ expect(Orthotypo::Analyzer.url?("https://unsplash.com/@lusvardi?utm_source=osuny")).to be true
6
+ expect(Orthotypo::Analyzer.url?("mot:")).to be false
7
+ end
8
+ end
@@ -5,7 +5,8 @@ describe Orthotypo::Composer::Fr do
5
5
  it 'adds spaces before double punctuation marks' do
6
6
  expect("mot: suite".ortho).to(eq("mot : suite"))
7
7
  expect("é: suite".ortho).to(eq("é : suite"))
8
- expect("1: suite".ortho).to(eq("1 : suite"))
8
+ # Pas automatisable, parce que 11:20
9
+ # expect("1: suite".ortho).to(eq("1 : suite"))
9
10
  expect("mot; suite".ortho).to(eq("mot ; suite"))
10
11
  expect("mot!".ortho).to(eq("mot !"))
11
12
  expect("mot !".ortho).to(eq("mot !"))
@@ -44,6 +45,14 @@ describe Orthotypo::Composer::Fr do
44
45
  expect("10 %".ortho).to(eq("10 %"))
45
46
  end
46
47
 
48
+ it 'fixes dates/time' do
49
+ expect("10/01/2023 16:00".ortho).to(eq("10/01/2023 16:00"))
50
+ expect("10/01/2023 16:00:00".ortho).to(eq("10/01/2023 16:00:00"))
51
+ expect("10 / 01 / 2023 16:00".ortho).to(eq("10/01/2023 16:00"))
52
+ expect("10 / 01 / 2023 16 : 00".ortho).to(eq("10/01/2023 16:00"))
53
+ expect("10 octobre 2023 16:00".ortho).to(eq("10 octobre 2023 16:00"))
54
+ end
55
+
47
56
  # https://www.scribbr.fr/elements-linguistiques/les-espaces/
48
57
  it 'tests de Justine Debret' do
49
58
  expect("Elle a vu son cousin,sa tante et son oncle.Ils allaient tous très bien.".ortho).to(eq("Elle a vu son cousin, sa tante et son oncle. Ils allaient tous très bien."))
@@ -53,6 +62,7 @@ describe Orthotypo::Composer::Fr do
53
62
  expect("Il a dit : «J’arrive ce matin ( ou plus tard ) à Paris [ rue de la République ] pour son anniversaire.»".ortho).to(eq("Il a dit : « J’arrive ce matin (ou plus tard) à Paris [rue de la République] pour son anniversaire. »"))
54
63
  # Le test suivant n'est pas automatisable, parce qu'on ne peut distinguer un Paris-Brest (le gâteau) d'un Paris - Brest (le trajet)
55
64
  # expect("Nous l’avons rencontré à Saint - Martin.".ortho).to(eq("Nous l’avons rencontré à Saint-Martin."))
65
+ expect("Il roule pendant 31, 5 km.".ortho).to(eq("Il roule pendant 31,5 km."))
56
66
  # Le test suivant est-il automatisable ?
57
67
  # expect("Il roule pendant 31, 5km.".ortho).to(eq("Il roule pendant 31,5 km."))
58
68
  # Pas automatisable, rien ne permet de distinguer s'il s'agit d'un rang ou d'un nombre
@@ -8,4 +8,14 @@ describe Orthotypo do
8
8
  it 'leaves html tags untouched' do
9
9
  expect("<b>bold</b>".ortho).to eq "<b>bold</b>"
10
10
  end
11
+
12
+ it 'leaves URLs untouched' do
13
+ expect("https://unsplash.com/@lusvardi?utm_source=osuny".ortho).to eq "https://unsplash.com/@lusvardi?utm_source=osuny"
14
+ expect("<a href=\"https://unsplash.com/@lusvardi?utm_source=osuny\">https://unsplash.com/@lusvardi?utm_source=osuny</a>".ortho).to eq "<a href=\"https://unsplash.com/@lusvardi?utm_source=osuny\">https://unsplash.com/@lusvardi?utm_source=osuny</a>"
15
+ end
16
+
17
+ it 'leaves URLs untouched' do
18
+ expect("prenom.nom@example.com".ortho).to eq "prenom.nom@example.com"
19
+ expect("<a href=\"mailto:prenom.nom@example.com\">prenom.nom@example.com</a>".ortho).to eq "<a href=\"mailto:prenom.nom@example.com\">prenom.nom@example.com</a>"
20
+ end
11
21
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: orthotypo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-16 00:00:00.000000000 Z
11
+ date: 2023-10-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rspec
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -87,6 +101,7 @@ files:
87
101
  - bin/console
88
102
  - bin/setup
89
103
  - lib/orthotypo.rb
104
+ - lib/orthotypo/analyzer.rb
90
105
  - lib/orthotypo/composer.rb
91
106
  - lib/orthotypo/composer/en.rb
92
107
  - lib/orthotypo/composer/en_gb.rb
@@ -98,6 +113,7 @@ files:
98
113
  - lib/orthotypo/version.rb
99
114
  - orthotypo.gemspec
100
115
  - sig/orthotypo.rbs
116
+ - spec/analyzer_spec.rb
101
117
  - spec/composer/en_spec.rb
102
118
  - spec/composer/fr_spec.rb
103
119
  - spec/localizer_spec.rb