orthotypo 0.5.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -1
- data/README.md +2 -2
- data/lib/orthotypo/analyzer.rb +18 -0
- data/lib/orthotypo/composer.rb +72 -10
- data/lib/orthotypo/version.rb +1 -1
- data/lib/orthotypo.rb +3 -0
- data/orthotypo.gemspec +1 -0
- data/spec/analyzer_spec.rb +8 -0
- data/spec/composer/fr_spec.rb +11 -1
- data/spec/orthotypo_spec.rb +10 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd24cbdb2c5adaa4f6ecf91acf144025549b7599aeb0f944e3f1987331448450
|
4
|
+
data.tar.gz: 6d9a7bb478c4cc0b76722128a1065fc99666f7d1f284cc82cb6f26acfe89bf70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e51b243378c8d97df6fd3bfa7f59a7aeb7f516d3f54d82cce4eb0ae8d3c7b6a5893211c857c72f8c249a73e0333f3d187ddf4d6145a45ab6de5039633aeaf457
|
7
|
+
data.tar.gz: dc77c1043a6210122c48a8d5849d63df04247d3d957707469d9e3029ed0cf1a9bec745cd12115bb4563254abc6e227c74e13f02285fc0cada9a0d75ab0be2ad6
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
orthotypo (0.
|
4
|
+
orthotypo (1.0.0)
|
5
5
|
htmlentities
|
6
|
+
nokogiri
|
6
7
|
|
7
8
|
GEM
|
8
9
|
remote: https://rubygems.org/
|
@@ -14,6 +15,10 @@ GEM
|
|
14
15
|
htmlentities (4.3.4)
|
15
16
|
json (2.6.3)
|
16
17
|
language_server-protocol (3.17.0.3)
|
18
|
+
nokogiri (1.15.4-arm64-darwin)
|
19
|
+
racc (~> 1.4)
|
20
|
+
nokogiri (1.15.4-x86_64-darwin)
|
21
|
+
racc (~> 1.4)
|
17
22
|
parallel (1.23.0)
|
18
23
|
parser (3.2.2.4)
|
19
24
|
ast (~> 2.4.1)
|
data/README.md
CHANGED
@@ -28,10 +28,10 @@ Ajout d'espace fine insécable avant les signes doubles en français.
|
|
28
28
|
## Roadmap
|
29
29
|
|
30
30
|
### v1
|
31
|
-
|
32
|
-
2. Ne pas endommager l'HTML et les HTML entities ( )
|
31
|
+
Ne pas endommager l'HTML et les HTML entities ( )
|
33
32
|
|
34
33
|
### v2
|
34
|
+
S'adapter aux locales (détecter I18n)
|
35
35
|
Permettre les configs
|
36
36
|
|
37
37
|
## Sources
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Orthotypo
|
2
|
+
class Analyzer
|
3
|
+
|
4
|
+
def self.url?(string)
|
5
|
+
(string =~ /\A#{URI::DEFAULT_PARSER.regexp[:ABS_URI]}\z/) &&
|
6
|
+
(string =~ /\A(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?\z/ix) ? true : false
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.email?(string)
|
10
|
+
string =~ /\A#{URI::MailTo::EMAIL_REGEXP}\z/ ? true : false
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.precious?(string)
|
14
|
+
email?(string) || url?(string)
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
data/lib/orthotypo/composer.rb
CHANGED
@@ -5,6 +5,7 @@ module Orthotypo
|
|
5
5
|
SPACE = ' '.freeze
|
6
6
|
NBSP = ' '.freeze
|
7
7
|
NNBSP = ' '.freeze
|
8
|
+
PRECIOUS_TOKEN = 'orthotypopreciousthing'
|
8
9
|
|
9
10
|
def initialize(string, html: nil)
|
10
11
|
@string = string
|
@@ -18,6 +19,12 @@ module Orthotypo
|
|
18
19
|
[]
|
19
20
|
end
|
20
21
|
|
22
|
+
def chars_with_space_before_after_digit
|
23
|
+
[
|
24
|
+
'%'
|
25
|
+
]
|
26
|
+
end
|
27
|
+
|
21
28
|
def chars_with_space_after
|
22
29
|
[
|
23
30
|
',',
|
@@ -41,6 +48,22 @@ module Orthotypo
|
|
41
48
|
[]
|
42
49
|
end
|
43
50
|
|
51
|
+
def chars_with_no_space_around_between_digits
|
52
|
+
[
|
53
|
+
'/',
|
54
|
+
':'
|
55
|
+
]
|
56
|
+
end
|
57
|
+
|
58
|
+
def chars_in_numbers
|
59
|
+
[
|
60
|
+
'.',
|
61
|
+
',',
|
62
|
+
'/',
|
63
|
+
':'
|
64
|
+
]
|
65
|
+
end
|
66
|
+
|
44
67
|
def is_html?
|
45
68
|
# TODO contains tags?
|
46
69
|
@html || contains_html_entities?
|
@@ -52,17 +75,21 @@ module Orthotypo
|
|
52
75
|
|
53
76
|
def prepare_ortho
|
54
77
|
@ortho = string.dup
|
55
|
-
@ortho = html_entities.decode(@ortho) if contains_html_entities?
|
78
|
+
# @ortho = html_entities.decode(@ortho) if contains_html_entities?
|
79
|
+
@nokogiri = Nokogiri::HTML.fragment @ortho
|
56
80
|
end
|
57
81
|
|
58
82
|
def clean_ortho
|
59
|
-
@ortho =
|
83
|
+
@ortho = @nokogiri.to_s
|
84
|
+
# @ortho = html_entities.encode(@ortho) if contains_html_entities?
|
60
85
|
end
|
61
86
|
|
62
87
|
def parse
|
63
88
|
prepare_ortho
|
89
|
+
preserve_precious_things
|
64
90
|
# Chars
|
65
91
|
parse_chars_with_space_before
|
92
|
+
parse_chars_with_space_before_after_digit
|
66
93
|
parse_chars_with_space_after
|
67
94
|
parse_chars_with_space_around
|
68
95
|
parse_chars_with_no_space_around
|
@@ -70,17 +97,47 @@ module Orthotypo
|
|
70
97
|
parse_pairs_with_space_around
|
71
98
|
parse_pairs_with_no_space_around
|
72
99
|
# Numbers
|
73
|
-
|
100
|
+
parse_chars_in_numbers
|
74
101
|
#
|
75
102
|
clean_ortho
|
103
|
+
restore_precious_things
|
104
|
+
end
|
105
|
+
|
106
|
+
def preserve_precious_things
|
107
|
+
@precious_things = []
|
108
|
+
@nokogiri.traverse do |node|
|
109
|
+
next unless node.text?
|
110
|
+
new_content = node.content.split(SPACE).map { |fragment|
|
111
|
+
if Analyzer::precious?(fragment)
|
112
|
+
token = "#{PRECIOUS_TOKEN}#{@precious_things.length}"
|
113
|
+
@precious_things << fragment
|
114
|
+
token
|
115
|
+
else
|
116
|
+
fragment
|
117
|
+
end
|
118
|
+
}.join(SPACE)
|
119
|
+
node.content = new_content
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def restore_precious_things
|
124
|
+
@precious_things.each_with_index do |value, index|
|
125
|
+
@ortho.gsub! "#{PRECIOUS_TOKEN}#{index}", value
|
126
|
+
end
|
76
127
|
end
|
77
128
|
|
78
129
|
def parse_chars_with_space_before
|
79
130
|
chars_with_space_before.each do |char|
|
80
131
|
# Espace normal avant -> espace fine insécable avant
|
81
|
-
fix(SPACE +
|
132
|
+
fix(SPACE + char, NNBSP + char)
|
82
133
|
# Pas d'espace avant -> espace fine insécable avant
|
83
|
-
fix(/([[:
|
134
|
+
fix(/([[:alpha:]])[#{char}]/, "\\1" + NNBSP + char)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def parse_chars_with_space_before_after_digit
|
139
|
+
chars_with_space_before_after_digit.each do |char|
|
140
|
+
fix(/([[:digit:]])[#{char}]/, "\\1" + NNBSP + char)
|
84
141
|
end
|
85
142
|
end
|
86
143
|
|
@@ -89,7 +146,7 @@ module Orthotypo
|
|
89
146
|
# Espace avant -> pas d'espace avant
|
90
147
|
fix(SPACE + char, char)
|
91
148
|
# Pas d'espace après -> espace après
|
92
|
-
fix(/[#{char}]([[:
|
149
|
+
fix(/[#{char}]([[:alpha:]])/, char + SPACE + "\\1")
|
93
150
|
end
|
94
151
|
end
|
95
152
|
|
@@ -98,7 +155,7 @@ module Orthotypo
|
|
98
155
|
# Espace normal avant -> espace fine insécable avant
|
99
156
|
fix(SPACE + char, NNBSP + char)
|
100
157
|
# Pas d'espace avant -> espace fine insécable avant
|
101
|
-
fix(/([[:
|
158
|
+
fix(/([[:alpha:]])[#{char}]/, "\\1" + NNBSP + char)
|
102
159
|
end
|
103
160
|
end
|
104
161
|
|
@@ -134,8 +191,10 @@ module Orthotypo
|
|
134
191
|
end
|
135
192
|
end
|
136
193
|
|
137
|
-
def
|
138
|
-
|
194
|
+
def parse_chars_in_numbers
|
195
|
+
chars_in_numbers.each do |char|
|
196
|
+
fix(/([[:digit:]])[[:space:]][#{char}]([[:digit:]])/, "\\1" + char + "\\2")
|
197
|
+
fix(/([[:digit:]])[[:space:]][#{char}][[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
|
139
198
|
fix(/([[:digit:]])[#{char}][[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
|
140
199
|
end
|
141
200
|
end
|
@@ -145,7 +204,10 @@ module Orthotypo
|
|
145
204
|
end
|
146
205
|
|
147
206
|
def fix(bad, good)
|
148
|
-
@
|
207
|
+
@nokogiri.traverse do |node|
|
208
|
+
next unless node.text?
|
209
|
+
node.content = node.content.gsub(bad, good)
|
210
|
+
end
|
149
211
|
end
|
150
212
|
end
|
151
213
|
end
|
data/lib/orthotypo/version.rb
CHANGED
data/lib/orthotypo.rb
CHANGED
data/orthotypo.gemspec
CHANGED
data/spec/composer/fr_spec.rb
CHANGED
@@ -5,7 +5,8 @@ describe Orthotypo::Composer::Fr do
|
|
5
5
|
it 'adds spaces before double punctuation marks' do
|
6
6
|
expect("mot: suite".ortho).to(eq("mot : suite"))
|
7
7
|
expect("é: suite".ortho).to(eq("é : suite"))
|
8
|
-
|
8
|
+
# Pas automatisable, parce que 11:20
|
9
|
+
# expect("1: suite".ortho).to(eq("1 : suite"))
|
9
10
|
expect("mot; suite".ortho).to(eq("mot ; suite"))
|
10
11
|
expect("mot!".ortho).to(eq("mot !"))
|
11
12
|
expect("mot !".ortho).to(eq("mot !"))
|
@@ -44,6 +45,14 @@ describe Orthotypo::Composer::Fr do
|
|
44
45
|
expect("10 %".ortho).to(eq("10 %"))
|
45
46
|
end
|
46
47
|
|
48
|
+
it 'fixes dates/time' do
|
49
|
+
expect("10/01/2023 16:00".ortho).to(eq("10/01/2023 16:00"))
|
50
|
+
expect("10/01/2023 16:00:00".ortho).to(eq("10/01/2023 16:00:00"))
|
51
|
+
expect("10 / 01 / 2023 16:00".ortho).to(eq("10/01/2023 16:00"))
|
52
|
+
expect("10 / 01 / 2023 16 : 00".ortho).to(eq("10/01/2023 16:00"))
|
53
|
+
expect("10 octobre 2023 16:00".ortho).to(eq("10 octobre 2023 16:00"))
|
54
|
+
end
|
55
|
+
|
47
56
|
# https://www.scribbr.fr/elements-linguistiques/les-espaces/
|
48
57
|
it 'tests de Justine Debret' do
|
49
58
|
expect("Elle a vu son cousin,sa tante et son oncle.Ils allaient tous très bien.".ortho).to(eq("Elle a vu son cousin, sa tante et son oncle. Ils allaient tous très bien."))
|
@@ -53,6 +62,7 @@ describe Orthotypo::Composer::Fr do
|
|
53
62
|
expect("Il a dit : «J’arrive ce matin ( ou plus tard ) à Paris [ rue de la République ] pour son anniversaire.»".ortho).to(eq("Il a dit : « J’arrive ce matin (ou plus tard) à Paris [rue de la République] pour son anniversaire. »"))
|
54
63
|
# Le test suivant n'est pas automatisable, parce qu'on ne peut distinguer un Paris-Brest (le gâteau) d'un Paris - Brest (le trajet)
|
55
64
|
# expect("Nous l’avons rencontré à Saint - Martin.".ortho).to(eq("Nous l’avons rencontré à Saint-Martin."))
|
65
|
+
expect("Il roule pendant 31, 5 km.".ortho).to(eq("Il roule pendant 31,5 km."))
|
56
66
|
# Le test suivant est-il automatisable ?
|
57
67
|
# expect("Il roule pendant 31, 5km.".ortho).to(eq("Il roule pendant 31,5 km."))
|
58
68
|
# Pas automatisable, rien ne permet de distinguer s'il s'agit d'un rang ou d'un nombre
|
data/spec/orthotypo_spec.rb
CHANGED
@@ -8,4 +8,14 @@ describe Orthotypo do
|
|
8
8
|
it 'leaves html tags untouched' do
|
9
9
|
expect("<b>bold</b>".ortho).to eq "<b>bold</b>"
|
10
10
|
end
|
11
|
+
|
12
|
+
it 'leaves URLs untouched' do
|
13
|
+
expect("https://unsplash.com/@lusvardi?utm_source=osuny".ortho).to eq "https://unsplash.com/@lusvardi?utm_source=osuny"
|
14
|
+
expect("<a href=\"https://unsplash.com/@lusvardi?utm_source=osuny\">https://unsplash.com/@lusvardi?utm_source=osuny</a>".ortho).to eq "<a href=\"https://unsplash.com/@lusvardi?utm_source=osuny\">https://unsplash.com/@lusvardi?utm_source=osuny</a>"
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'leaves URLs untouched' do
|
18
|
+
expect("prenom.nom@example.com".ortho).to eq "prenom.nom@example.com"
|
19
|
+
expect("<a href=\"mailto:prenom.nom@example.com\">prenom.nom@example.com</a>".ortho).to eq "<a href=\"mailto:prenom.nom@example.com\">prenom.nom@example.com</a>"
|
20
|
+
end
|
11
21
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: orthotypo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arnaud Levy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rspec
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,6 +101,7 @@ files:
|
|
87
101
|
- bin/console
|
88
102
|
- bin/setup
|
89
103
|
- lib/orthotypo.rb
|
104
|
+
- lib/orthotypo/analyzer.rb
|
90
105
|
- lib/orthotypo/composer.rb
|
91
106
|
- lib/orthotypo/composer/en.rb
|
92
107
|
- lib/orthotypo/composer/en_gb.rb
|
@@ -98,6 +113,7 @@ files:
|
|
98
113
|
- lib/orthotypo/version.rb
|
99
114
|
- orthotypo.gemspec
|
100
115
|
- sig/orthotypo.rbs
|
116
|
+
- spec/analyzer_spec.rb
|
101
117
|
- spec/composer/en_spec.rb
|
102
118
|
- spec/composer/fr_spec.rb
|
103
119
|
- spec/localizer_spec.rb
|