auto-correct 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/auto-correct.rb +1 -8
- metadata +7 -12
- data/lib/auto-correct/base.rb +0 -13
- data/lib/auto-correct/format.rb +0 -40
- data/lib/auto-correct/html.rb +0 -14
- data/lib/auto-correct/strategery.rb +0 -43
- data/lib/auto-correct/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d84d77dea870555ee1152e8b37dd89e4e6e927561e822839a94eb1508c4c4b3
|
4
|
+
data.tar.gz: 0b3da6b3cfcc96f0e422ae2c129f40995b254f97f974286470be4844135bf364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e48c78a7f24c5472f3ce99289747103cb55e84ee449573ab94ba8f9b07e3d510cdf8f465ef618fe01139a00e0eac7bdfb4de59dbfa32df7c36de4591400565c
|
7
|
+
data.tar.gz: 8c20910668d9f0e0b6f53f1dfa9fc0dc5fd5989b198127bb4d4880bf50c111b609e2e9a33ff3a552403a0b13eab719407668795324bed042767cb0f3a1e55fee
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# auto-correct
|
2
2
|
|
3
|
+
> NOTE: This gem has not mantained for a long time, please move to: https://github.com/huacnlee/autocorrect/tree/main/autocorrect-rb
|
4
|
+
|
3
5
|
Automatically add whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols).
|
4
6
|
|
5
7
|
中文、日语、韩语 + 英文混排自动纠正补充空格,此方式已在 Ruby China 使用多年,支持 HTML 处理。
|
data/lib/auto-correct.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: auto-correct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luikore
|
@@ -9,22 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-10-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: autocorrect-rb
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 2.1.2
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 2.1.2
|
28
28
|
description: Automatically add whitespace between Chinese and and half-width characters
|
29
29
|
(alphabetical letters, numerical digits and symbols).
|
30
30
|
email:
|
@@ -36,12 +36,7 @@ extra_rdoc_files: []
|
|
36
36
|
files:
|
37
37
|
- README.md
|
38
38
|
- lib/auto-correct.rb
|
39
|
-
|
40
|
-
- lib/auto-correct/format.rb
|
41
|
-
- lib/auto-correct/html.rb
|
42
|
-
- lib/auto-correct/strategery.rb
|
43
|
-
- lib/auto-correct/version.rb
|
44
|
-
homepage: https://github.com/huacnlee/auto-correct
|
39
|
+
homepage: https://github.com/huacnlee/autocorrect
|
45
40
|
licenses: []
|
46
41
|
metadata: {}
|
47
42
|
post_install_message:
|
@@ -59,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
59
54
|
- !ruby/object:Gem::Version
|
60
55
|
version: '0'
|
61
56
|
requirements: []
|
62
|
-
rubygems_version: 3.
|
57
|
+
rubygems_version: 3.3.3
|
63
58
|
signing_key:
|
64
59
|
specification_version: 4
|
65
60
|
summary: Automatically add whitespace between Chinese and and half-width characters
|
data/lib/auto-correct/base.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
@@strategies = []
|
3
|
-
|
4
|
-
class << self
|
5
|
-
def rule(one, other, space: false, reverse: false)
|
6
|
-
@@strategies << AutoCorrect::Strategery.new(one, other, space: space, reverse: reverse)
|
7
|
-
end
|
8
|
-
|
9
|
-
def strategies
|
10
|
-
@@strategies
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
data/lib/auto-correct/format.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
CJK = '\p{Han}|\p{Hangul}|\p{Hanunoo}|\p{Katakana}|\p{Hiragana}|\p{Bopomofo}'
|
3
|
-
SPACE = "[ ]"
|
4
|
-
|
5
|
-
# rubocop:disable Style/StringLiterals
|
6
|
-
# EnglishLetter
|
7
|
-
rule CJK.to_s, '[a-zA-Z0-9]', space: true, reverse: true
|
8
|
-
|
9
|
-
# SpecialSymbol
|
10
|
-
rule CJK.to_s, '[\|+*]', space: true, reverse: true
|
11
|
-
rule CJK.to_s, '[@]', space: true, reverse: false
|
12
|
-
rule CJK.to_s, '[\[\(‘“]', space: true
|
13
|
-
rule '[’”\]\)!%]', CJK.to_s, space: true
|
14
|
-
rule '[”\]\)!]', '[a-zA-Z0-9]+', space: true
|
15
|
-
|
16
|
-
# FullwidthPunctuation remove space case, Fullwidth can safe to remove spaces
|
17
|
-
rule %r{[\w#{CJK}]}o, '[,。!?:;)」》】”’]', reverse: true
|
18
|
-
rule '[‘“【「《(]', %r{[\w#{CJK}]}o, reverse: true
|
19
|
-
|
20
|
-
class << self
|
21
|
-
DASH_HAN_RE = /([#{CJK})】」》”’])(-+)([#{CJK}(【「《“‘])/
|
22
|
-
LEFT_QUOTE_RE = /#{SPACE}([(【「《])/
|
23
|
-
RIGHT_QUOTE_RE = /([)】」》])#{SPACE}/
|
24
|
-
|
25
|
-
def format(str)
|
26
|
-
strategies.each do |s|
|
27
|
-
str = s.format(str)
|
28
|
-
end
|
29
|
-
space_dash_with_hans(str)
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
def space_dash_with_hans(str)
|
35
|
-
str = str.gsub(DASH_HAN_RE, '\1 \2 \3')
|
36
|
-
str = str.gsub(LEFT_QUOTE_RE, '\1')
|
37
|
-
str.gsub(RIGHT_QUOTE_RE, '\1')
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
data/lib/auto-correct/html.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
|
-
class AutoCorrect
|
4
|
-
class << self
|
5
|
-
def format_html(html)
|
6
|
-
doc = Nokogiri::HTML(html)
|
7
|
-
doc.traverse do |node|
|
8
|
-
next unless node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
9
|
-
node.content = AutoCorrect.format(node.content)
|
10
|
-
end
|
11
|
-
doc.css("body").inner_html
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
class Strategery
|
3
|
-
attr_reader :space, :reverse
|
4
|
-
attr_reader :add_space_rules, :remove_space_rules
|
5
|
-
|
6
|
-
def initialize(one, other, space: false, reverse: false)
|
7
|
-
@space = space
|
8
|
-
@reverse = reverse
|
9
|
-
|
10
|
-
@add_space_rules = [
|
11
|
-
/(#{one})(#{other})/u,
|
12
|
-
/(#{other})(#{one})/u
|
13
|
-
]
|
14
|
-
|
15
|
-
@remove_space_rules = [
|
16
|
-
/(#{one})#{SPACE}+(#{other})/u,
|
17
|
-
/(#{other})#{SPACE}+(#{one})/u
|
18
|
-
]
|
19
|
-
end
|
20
|
-
|
21
|
-
def format(str)
|
22
|
-
self.space ? add_space(str) : remove_space(str)
|
23
|
-
end
|
24
|
-
|
25
|
-
def add_space(str)
|
26
|
-
r0, r1 = add_space_rules
|
27
|
-
str = str.gsub(r0) { "#$1 #$2" }
|
28
|
-
if self.reverse
|
29
|
-
str = str.gsub(r1) { "#$1 #$2" }
|
30
|
-
end
|
31
|
-
str
|
32
|
-
end
|
33
|
-
|
34
|
-
def remove_space(str)
|
35
|
-
r0, r1 = remove_space_rules
|
36
|
-
str = str.gsub(r0) { "#$1 #$2" }
|
37
|
-
if self.reverse
|
38
|
-
str = str.gsub(r1) { "#$1 #$2" }
|
39
|
-
end
|
40
|
-
str
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
data/lib/auto-correct/version.rb
DELETED