orthotypo 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '05389ada5ba288061bd70f8e3170fb77b7d4d9df171b90e55d3c0c6edbf4d810'
4
- data.tar.gz: edd5731b28644f5515133994a89185f2da4fc5cffb7c12273f2d1f48985571be
3
+ metadata.gz: b41b05766be4f11e6494a7e89823a6ab730910aec4a679d22b764ae96fb05f10
4
+ data.tar.gz: 0c2a072f5c3ebd8583b70c7ee4bf0eafe064c827d4e0af250728bcf5762dc5ae
5
5
  SHA512:
6
- metadata.gz: 32280cc5abbc8b3d9a728db9943f815821c84c97384f59f6be2f0ba12789e2ef204a8c72ec579a2544cea505b9c2f3d46c51aef5db7006ac74e3e8d568813a85
7
- data.tar.gz: b81b849cf147939f7ad2e6fb16ccbc3af8eed2016efa67a288fcc9fc2c7cbd3d9d2d4b78ad4d70588fd75dade26830e4443627024838404f01e4010cbb5f2989
6
+ metadata.gz: 20ff73ede6bf05d4b2b8842604b09267f36cf2d44fc17c6e4a467e9e703934a20f18616368cc5af88529122166130c8b868612af6caab95ff6f685236b862443
7
+ data.tar.gz: 55f9a2023f185176d52de197847f92ee61ea0e8ba20d70c920fec7e31b4d86dbae0ab87594adcd7c406fcdbbc2ef189fa8a113bcfeab9f60786c31dd77771f1e
data/Gemfile.lock CHANGED
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- orthotypo (0.5.1)
4
+ orthotypo (0.6.0)
5
5
  htmlentities
6
+ nokogiri
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
@@ -14,6 +15,10 @@ GEM
14
15
  htmlentities (4.3.4)
15
16
  json (2.6.3)
16
17
  language_server-protocol (3.17.0.3)
18
+ nokogiri (1.15.4-arm64-darwin)
19
+ racc (~> 1.4)
20
+ nokogiri (1.15.4-x86_64-darwin)
21
+ racc (~> 1.4)
17
22
  parallel (1.23.0)
18
23
  parser (3.2.2.4)
19
24
  ast (~> 2.4.1)
@@ -0,0 +1,18 @@
1
+ module Orthotypo
2
+ class Analyzer
3
+
4
+ def self.url?(string)
5
+ (string =~ /\A#{URI::DEFAULT_PARSER.regexp[:ABS_URI]}\z/) &&
6
+ (string =~ /\A(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?\z/ix) ? true : false
7
+ end
8
+
9
+ def self.email?(string)
10
+ string =~ /\A#{URI::MailTo::EMAIL_REGEXP}\z/ ? true : false
11
+ end
12
+
13
+ def self.precious?(string)
14
+ email?(string) || url?(string)
15
+ end
16
+
17
+ end
18
+ end
@@ -5,6 +5,7 @@ module Orthotypo
5
5
  SPACE = ' '.freeze
6
6
  NBSP = ' '.freeze
7
7
  NNBSP = ' '.freeze
8
+ PRECIOUS_TOKEN = 'orthotypopreciousthing'
8
9
 
9
10
  def initialize(string, html: nil)
10
11
  @string = string
@@ -52,15 +53,18 @@ module Orthotypo
52
53
 
53
54
  def prepare_ortho
54
55
  @ortho = string.dup
55
- @ortho = html_entities.decode(@ortho) if contains_html_entities?
56
+ # @ortho = html_entities.decode(@ortho) if contains_html_entities?
57
+ @nokogiri = Nokogiri::HTML.fragment @ortho
56
58
  end
57
59
 
58
60
  def clean_ortho
59
- @ortho = html_entities.encode(@ortho) if contains_html_entities?
61
+ @ortho = @nokogiri.to_s
62
+ # @ortho = html_entities.encode(@ortho) if contains_html_entities?
60
63
  end
61
64
 
62
65
  def parse
63
66
  prepare_ortho
67
+ preserve_precious_things
64
68
  # Chars
65
69
  parse_chars_with_space_before
66
70
  parse_chars_with_space_after
@@ -73,6 +77,30 @@ module Orthotypo
73
77
  parse_numbers
74
78
  #
75
79
  clean_ortho
80
+ restore_precious_things
81
+ end
82
+
83
+ def preserve_precious_things
84
+ @precious_things = []
85
+ @nokogiri.traverse do |node|
86
+ next unless node.text?
87
+ new_content = node.content.split(SPACE).map { |fragment|
88
+ if Analyzer::precious?(fragment)
89
+ token = "#{PRECIOUS_TOKEN}#{@precious_things.length}"
90
+ @precious_things << fragment
91
+ token
92
+ else
93
+ fragment
94
+ end
95
+ }.join(SPACE)
96
+ node.content = new_content
97
+ end
98
+ end
99
+
100
+ def restore_precious_things
101
+ @precious_things.each_with_index do |value, index|
102
+ @ortho.gsub! "#{PRECIOUS_TOKEN}#{index}", value
103
+ end
76
104
  end
77
105
 
78
106
  def parse_chars_with_space_before
@@ -145,7 +173,10 @@ module Orthotypo
145
173
  end
146
174
 
147
175
  def fix(bad, good)
148
- @ortho.gsub! bad, good
176
+ @nokogiri.traverse do |node|
177
+ next unless node.text?
178
+ node.content = node.content.gsub(bad, good)
179
+ end
149
180
  end
150
181
  end
151
182
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Orthotypo
4
- VERSION = "0.5.1"
4
+ VERSION = "0.6.0"
5
5
  end
data/lib/orthotypo.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "htmlentities"
4
+ require "nokogiri"
5
+ require "uri"
6
+ require_relative "orthotypo/analyzer"
4
7
  require_relative "orthotypo/composer"
5
8
  require_relative "orthotypo/composer/fr"
6
9
  require_relative "orthotypo/composer/fr_fr"
data/orthotypo.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = "lib"
22
22
 
23
23
  spec.add_dependency "htmlentities"
24
+ spec.add_dependency "nokogiri"
24
25
 
25
26
  spec.add_development_dependency "rspec"
26
27
  spec.add_development_dependency "rspec-nc"
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+
3
+ describe Orthotypo::Analyzer do
4
+ it 'find urls' do
5
+ expect(Orthotypo::Analyzer.url?("https://unsplash.com/@lusvardi?utm_source=osuny")).to be true
6
+ expect(Orthotypo::Analyzer.url?("mot:")).to be false
7
+ end
8
+ end
@@ -8,4 +8,14 @@ describe Orthotypo do
8
8
  it 'leaves html tags untouched' do
9
9
  expect("<b>bold</b>".ortho).to eq "<b>bold</b>"
10
10
  end
11
+
12
+ it 'leaves URLs untouched' do
13
+ expect("https://unsplash.com/@lusvardi?utm_source=osuny".ortho).to eq "https://unsplash.com/@lusvardi?utm_source=osuny"
14
+ expect("<a href=\"https://unsplash.com/@lusvardi?utm_source=osuny\">https://unsplash.com/@lusvardi?utm_source=osuny</a>".ortho).to eq "<a href=\"https://unsplash.com/@lusvardi?utm_source=osuny\">https://unsplash.com/@lusvardi?utm_source=osuny</a>"
15
+ end
16
+
17
+ it 'leaves URLs untouched' do
18
+ expect("prenom.nom@example.com".ortho).to eq "prenom.nom@example.com"
19
+ expect("<a href=\"mailto:prenom.nom@example.com\">prenom.nom@example.com</a>".ortho).to eq "<a href=\"mailto:prenom.nom@example.com\">prenom.nom@example.com</a>"
20
+ end
11
21
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: orthotypo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rspec
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -87,6 +101,7 @@ files:
87
101
  - bin/console
88
102
  - bin/setup
89
103
  - lib/orthotypo.rb
104
+ - lib/orthotypo/analyzer.rb
90
105
  - lib/orthotypo/composer.rb
91
106
  - lib/orthotypo/composer/en.rb
92
107
  - lib/orthotypo/composer/en_gb.rb
@@ -98,6 +113,7 @@ files:
98
113
  - lib/orthotypo/version.rb
99
114
  - orthotypo.gemspec
100
115
  - sig/orthotypo.rbs
116
+ - spec/analyzer_spec.rb
101
117
  - spec/composer/en_spec.rb
102
118
  - spec/composer/fr_spec.rb
103
119
  - spec/localizer_spec.rb