auto-correct 0.3.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -13
- data/lib/auto-correct.rb +0 -3
- data/lib/auto-correct/format.rb +17 -27
- data/lib/auto-correct/version.rb +1 -1
- metadata +6 -7
- data/lib/auto-correct/string.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc4ebaa39c033494ca5d6e7b704a7563755f62ea2a120000f012908780a70375
|
4
|
+
data.tar.gz: 33a17e83502ced4f06c0a11b222038d036e46c235ddf6b4bda68131dcaf58e84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f88c421b4f2c5cf7fc16063778d57d4f6e3b4aa7eef8fe51075cec0df20fc539d760de99c37ddfc2557069836cbffb310087a2accbce27494d15edfe1748a742
|
7
|
+
data.tar.gz: 2e4bef9e663b45553e1a930a3d16c30283f8ed091f34bc26416d1658c7e45e36b4643cb15686e906adbc771e18c2d1befd07e8268742f8bd71409f47d24571b5
|
data/README.md
CHANGED
@@ -4,15 +4,13 @@ Automatically add whitespace between CJK (Chinese, Japanese, Korean) and half-wi
|
|
4
4
|
|
5
5
|
中文、日语、韩语 + 英文混排自动纠正补充空格,此方式已在 Ruby China 使用多年,支持 HTML 处理。
|
6
6
|
|
7
|
-
[](https://rubygems.org/gems/auto-correct) [](http://travis-ci.org/huacnlee/auto-correct)
|
9
|
-
|
7
|
+
[](https://rubygems.org/gems/auto-correct) [](https://github.com/huacnlee/auto-correct/actions?query=workflow%3Abuild)
|
10
8
|
|
11
9
|
## Other implements
|
12
10
|
|
13
11
|
- Ruby - [auto-correct](https://github.com/huacnlee/auto-correct).
|
14
12
|
- Go - [go-auto-correct](https://github.com/huacnlee/go-auto-correct).
|
15
|
-
- Rust - [
|
13
|
+
- Rust - [autocorrect](https://github.com/huacnlee/autocorrect).
|
16
14
|
|
17
15
|
## Features
|
18
16
|
|
@@ -36,7 +34,7 @@ AutoCorrect.format("于3月10日开始")
|
|
36
34
|
# => "于 3 月 10 日开始"
|
37
35
|
|
38
36
|
AutoCorrect.format("包装日期为2013年3月10日")
|
39
|
-
# => "包装日期为2013年3月10日"
|
37
|
+
# => "包装日期为 2013 年 3 月 10 日"
|
40
38
|
|
41
39
|
AutoCorrect.format("生产环境中使用Ruby")
|
42
40
|
# => "生产环境中使用 Ruby"
|
@@ -73,24 +71,24 @@ Calculating -------------------------------------
|
|
73
71
|
```
|
74
72
|
|
75
73
|
| Total chars | Duration |
|
76
|
-
|
|
77
|
-
| 50
|
78
|
-
| 100
|
79
|
-
| 400
|
74
|
+
| ----------- | -------- |
|
75
|
+
| 50 | 0.33 ms |
|
76
|
+
| 100 | 0.60 ms |
|
77
|
+
| 400 | 2 ms |
|
80
78
|
|
81
79
|
### FormatHTML
|
82
80
|
|
83
81
|
| Total chars | Duration |
|
84
|
-
|
|
85
|
-
| 2K
|
82
|
+
| ----------- | -------- |
|
83
|
+
| 2K | 7 ms |
|
86
84
|
|
87
85
|
## Use cases
|
88
86
|
|
89
|
-
|
87
|
+
- [Ruby China](https://ruby-china.org) - 目前整站都做了自动转换处理。
|
90
88
|
|
91
89
|
## Links
|
92
90
|
|
93
|
-
|
91
|
+
- [Chinese Copywriting Guidelines](https://github.com/sparanoid/chinese-copywriting-guidelines)
|
94
92
|
|
95
93
|
## License
|
96
94
|
|
data/lib/auto-correct.rb
CHANGED
@@ -2,10 +2,7 @@ require "auto-correct/strategery"
|
|
2
2
|
require "auto-correct/base"
|
3
3
|
require "auto-correct/format"
|
4
4
|
require "auto-correct/html"
|
5
|
-
require "auto-correct/string"
|
6
5
|
require "auto-correct/version"
|
7
6
|
|
8
7
|
class AutoCorrect
|
9
8
|
end
|
10
|
-
|
11
|
-
String.send :include, AutoCorrect::String
|
data/lib/auto-correct/format.rb
CHANGED
@@ -4,47 +4,37 @@ class AutoCorrect
|
|
4
4
|
|
5
5
|
# rubocop:disable Style/StringLiterals
|
6
6
|
# EnglishLetter
|
7
|
-
rule
|
7
|
+
rule CJK.to_s, '[a-zA-Z0-9]', space: true, reverse: true
|
8
8
|
|
9
9
|
# SpecialSymbol
|
10
|
-
rule
|
11
|
-
rule
|
12
|
-
rule '[
|
10
|
+
rule CJK.to_s, '[\|+*]', space: true, reverse: true
|
11
|
+
rule CJK.to_s, '[@]', space: true, reverse: false
|
12
|
+
rule CJK.to_s, '[\[\(‘“]', space: true
|
13
|
+
rule '[’”\]\)!%]', CJK.to_s, space: true
|
13
14
|
rule '[”\]\)!]', '[a-zA-Z0-9]+', space: true
|
14
15
|
|
15
|
-
# FullwidthPunctuation
|
16
|
-
rule %r
|
17
|
-
rule '[
|
16
|
+
# FullwidthPunctuation remove space case, Fullwidth can safe to remove spaces
|
17
|
+
rule %r{[\w#{CJK}]}o, '[,。!?:;)」》】”’]', reverse: true
|
18
|
+
rule '[‘“【「《(]', %r{[\w#{CJK}]}o, reverse: true
|
18
19
|
|
19
20
|
class << self
|
20
|
-
|
21
|
-
DASH_HAN_RE = /([#{CJK})】」》”’])([\-]+)([#{CJK}(【「《“‘])/
|
21
|
+
DASH_HAN_RE = /([#{CJK})】」》”’])(-+)([#{CJK}(【「《“‘])/
|
22
22
|
LEFT_QUOTE_RE = /#{SPACE}([(【「《])/
|
23
23
|
RIGHT_QUOTE_RE = /([)】」》])#{SPACE}/
|
24
24
|
|
25
25
|
def format(str)
|
26
|
-
|
27
|
-
|
28
|
-
out = s.format(out)
|
26
|
+
strategies.each do |s|
|
27
|
+
str = s.format(str)
|
29
28
|
end
|
30
|
-
|
31
|
-
out = space_dash_with_hans(out)
|
32
|
-
out
|
29
|
+
space_dash_with_hans(str)
|
33
30
|
end
|
34
31
|
|
35
32
|
private
|
36
33
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
def space_dash_with_hans(str)
|
44
|
-
str = str.gsub(DASH_HAN_RE, '\1 \2 \3')
|
45
|
-
str = str.gsub(LEFT_QUOTE_RE, '\1')
|
46
|
-
str = str.gsub(RIGHT_QUOTE_RE, '\1')
|
47
|
-
str
|
48
|
-
end
|
34
|
+
def space_dash_with_hans(str)
|
35
|
+
str = str.gsub(DASH_HAN_RE, '\1 \2 \3')
|
36
|
+
str = str.gsub(LEFT_QUOTE_RE, '\1')
|
37
|
+
str.gsub(RIGHT_QUOTE_RE, '\1')
|
38
|
+
end
|
49
39
|
end
|
50
40
|
end
|
data/lib/auto-correct/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: auto-correct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luikore
|
8
8
|
- Jason Lee
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-07-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -40,12 +40,11 @@ files:
|
|
40
40
|
- lib/auto-correct/format.rb
|
41
41
|
- lib/auto-correct/html.rb
|
42
42
|
- lib/auto-correct/strategery.rb
|
43
|
-
- lib/auto-correct/string.rb
|
44
43
|
- lib/auto-correct/version.rb
|
45
44
|
homepage: https://github.com/huacnlee/auto-correct
|
46
45
|
licenses: []
|
47
46
|
metadata: {}
|
48
|
-
post_install_message:
|
47
|
+
post_install_message:
|
49
48
|
rdoc_options: []
|
50
49
|
require_paths:
|
51
50
|
- lib
|
@@ -60,8 +59,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
59
|
- !ruby/object:Gem::Version
|
61
60
|
version: '0'
|
62
61
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
64
|
-
signing_key:
|
62
|
+
rubygems_version: 3.2.3
|
63
|
+
signing_key:
|
65
64
|
specification_version: 4
|
66
65
|
summary: Automatically add whitespace between Chinese and and half-width characters
|
67
66
|
(alphabetical letters, numerical digits and symbols).
|
data/lib/auto-correct/string.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
module String
|
3
|
-
def auto_space!
|
4
|
-
ActiveSupport::Deprecation.warn("String.auto_space! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
5
|
-
self.sub!(self, AutoCorrect.format(self))
|
6
|
-
end
|
7
|
-
|
8
|
-
def auto_correct!
|
9
|
-
ActiveSupport::Deprecation.warn("String.auto_correct! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
10
|
-
self.sub!(self, AutoCorrect.format(self))
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|