auto-correct 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/auto-correct.rb +1 -8
- metadata +7 -12
- data/lib/auto-correct/base.rb +0 -13
- data/lib/auto-correct/format.rb +0 -40
- data/lib/auto-correct/html.rb +0 -14
- data/lib/auto-correct/strategery.rb +0 -43
- data/lib/auto-correct/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d84d77dea870555ee1152e8b37dd89e4e6e927561e822839a94eb1508c4c4b3
|
4
|
+
data.tar.gz: 0b3da6b3cfcc96f0e422ae2c129f40995b254f97f974286470be4844135bf364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e48c78a7f24c5472f3ce99289747103cb55e84ee449573ab94ba8f9b07e3d510cdf8f465ef618fe01139a00e0eac7bdfb4de59dbfa32df7c36de4591400565c
|
7
|
+
data.tar.gz: 8c20910668d9f0e0b6f53f1dfa9fc0dc5fd5989b198127bb4d4880bf50c111b609e2e9a33ff3a552403a0b13eab719407668795324bed042767cb0f3a1e55fee
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# auto-correct
|
2
2
|
|
3
|
+
> NOTE: This gem has not mantained for a long time, please move to: https://github.com/huacnlee/autocorrect/tree/main/autocorrect-rb
|
4
|
+
|
3
5
|
Automatically add whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols).
|
4
6
|
|
5
7
|
中文、日语、韩语 + 英文混排自动纠正补充空格,此方式已在 Ruby China 使用多年,支持 HTML 处理。
|
data/lib/auto-correct.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: auto-correct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luikore
|
@@ -9,22 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-10-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: autocorrect-rb
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 2.1.2
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 2.1.2
|
28
28
|
description: Automatically add whitespace between Chinese and and half-width characters
|
29
29
|
(alphabetical letters, numerical digits and symbols).
|
30
30
|
email:
|
@@ -36,12 +36,7 @@ extra_rdoc_files: []
|
|
36
36
|
files:
|
37
37
|
- README.md
|
38
38
|
- lib/auto-correct.rb
|
39
|
-
|
40
|
-
- lib/auto-correct/format.rb
|
41
|
-
- lib/auto-correct/html.rb
|
42
|
-
- lib/auto-correct/strategery.rb
|
43
|
-
- lib/auto-correct/version.rb
|
44
|
-
homepage: https://github.com/huacnlee/auto-correct
|
39
|
+
homepage: https://github.com/huacnlee/autocorrect
|
45
40
|
licenses: []
|
46
41
|
metadata: {}
|
47
42
|
post_install_message:
|
@@ -59,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
59
54
|
- !ruby/object:Gem::Version
|
60
55
|
version: '0'
|
61
56
|
requirements: []
|
62
|
-
rubygems_version: 3.
|
57
|
+
rubygems_version: 3.3.3
|
63
58
|
signing_key:
|
64
59
|
specification_version: 4
|
65
60
|
summary: Automatically add whitespace between Chinese and and half-width characters
|
data/lib/auto-correct/base.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
@@strategies = []
|
3
|
-
|
4
|
-
class << self
|
5
|
-
def rule(one, other, space: false, reverse: false)
|
6
|
-
@@strategies << AutoCorrect::Strategery.new(one, other, space: space, reverse: reverse)
|
7
|
-
end
|
8
|
-
|
9
|
-
def strategies
|
10
|
-
@@strategies
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
data/lib/auto-correct/format.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
CJK = '\p{Han}|\p{Hangul}|\p{Hanunoo}|\p{Katakana}|\p{Hiragana}|\p{Bopomofo}'
|
3
|
-
SPACE = "[ ]"
|
4
|
-
|
5
|
-
# rubocop:disable Style/StringLiterals
|
6
|
-
# EnglishLetter
|
7
|
-
rule CJK.to_s, '[a-zA-Z0-9]', space: true, reverse: true
|
8
|
-
|
9
|
-
# SpecialSymbol
|
10
|
-
rule CJK.to_s, '[\|+*]', space: true, reverse: true
|
11
|
-
rule CJK.to_s, '[@]', space: true, reverse: false
|
12
|
-
rule CJK.to_s, '[\[\(‘“]', space: true
|
13
|
-
rule '[’”\]\)!%]', CJK.to_s, space: true
|
14
|
-
rule '[”\]\)!]', '[a-zA-Z0-9]+', space: true
|
15
|
-
|
16
|
-
# FullwidthPunctuation remove space case, Fullwidth can safe to remove spaces
|
17
|
-
rule %r{[\w#{CJK}]}o, '[,。!?:;)」》】”’]', reverse: true
|
18
|
-
rule '[‘“【「《(]', %r{[\w#{CJK}]}o, reverse: true
|
19
|
-
|
20
|
-
class << self
|
21
|
-
DASH_HAN_RE = /([#{CJK})】」》”’])(-+)([#{CJK}(【「《“‘])/
|
22
|
-
LEFT_QUOTE_RE = /#{SPACE}([(【「《])/
|
23
|
-
RIGHT_QUOTE_RE = /([)】」》])#{SPACE}/
|
24
|
-
|
25
|
-
def format(str)
|
26
|
-
strategies.each do |s|
|
27
|
-
str = s.format(str)
|
28
|
-
end
|
29
|
-
space_dash_with_hans(str)
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
def space_dash_with_hans(str)
|
35
|
-
str = str.gsub(DASH_HAN_RE, '\1 \2 \3')
|
36
|
-
str = str.gsub(LEFT_QUOTE_RE, '\1')
|
37
|
-
str.gsub(RIGHT_QUOTE_RE, '\1')
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
data/lib/auto-correct/html.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
|
-
class AutoCorrect
|
4
|
-
class << self
|
5
|
-
def format_html(html)
|
6
|
-
doc = Nokogiri::HTML(html)
|
7
|
-
doc.traverse do |node|
|
8
|
-
next unless node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
9
|
-
node.content = AutoCorrect.format(node.content)
|
10
|
-
end
|
11
|
-
doc.css("body").inner_html
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
class Strategery
|
3
|
-
attr_reader :space, :reverse
|
4
|
-
attr_reader :add_space_rules, :remove_space_rules
|
5
|
-
|
6
|
-
def initialize(one, other, space: false, reverse: false)
|
7
|
-
@space = space
|
8
|
-
@reverse = reverse
|
9
|
-
|
10
|
-
@add_space_rules = [
|
11
|
-
/(#{one})(#{other})/u,
|
12
|
-
/(#{other})(#{one})/u
|
13
|
-
]
|
14
|
-
|
15
|
-
@remove_space_rules = [
|
16
|
-
/(#{one})#{SPACE}+(#{other})/u,
|
17
|
-
/(#{other})#{SPACE}+(#{one})/u
|
18
|
-
]
|
19
|
-
end
|
20
|
-
|
21
|
-
def format(str)
|
22
|
-
self.space ? add_space(str) : remove_space(str)
|
23
|
-
end
|
24
|
-
|
25
|
-
def add_space(str)
|
26
|
-
r0, r1 = add_space_rules
|
27
|
-
str = str.gsub(r0) { "#$1 #$2" }
|
28
|
-
if self.reverse
|
29
|
-
str = str.gsub(r1) { "#$1 #$2" }
|
30
|
-
end
|
31
|
-
str
|
32
|
-
end
|
33
|
-
|
34
|
-
def remove_space(str)
|
35
|
-
r0, r1 = remove_space_rules
|
36
|
-
str = str.gsub(r0) { "#$1 #$2" }
|
37
|
-
if self.reverse
|
38
|
-
str = str.gsub(r1) { "#$1 #$2" }
|
39
|
-
end
|
40
|
-
str
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
data/lib/auto-correct/version.rb
DELETED