auto-correct 0.3.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -13
- data/lib/auto-correct.rb +1 -11
- metadata +10 -16
- data/lib/auto-correct/base.rb +0 -13
- data/lib/auto-correct/format.rb +0 -50
- data/lib/auto-correct/html.rb +0 -14
- data/lib/auto-correct/strategery.rb +0 -43
- data/lib/auto-correct/string.rb +0 -13
- data/lib/auto-correct/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d84d77dea870555ee1152e8b37dd89e4e6e927561e822839a94eb1508c4c4b3
|
4
|
+
data.tar.gz: 0b3da6b3cfcc96f0e422ae2c129f40995b254f97f974286470be4844135bf364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e48c78a7f24c5472f3ce99289747103cb55e84ee449573ab94ba8f9b07e3d510cdf8f465ef618fe01139a00e0eac7bdfb4de59dbfa32df7c36de4591400565c
|
7
|
+
data.tar.gz: 8c20910668d9f0e0b6f53f1dfa9fc0dc5fd5989b198127bb4d4880bf50c111b609e2e9a33ff3a552403a0b13eab719407668795324bed042767cb0f3a1e55fee
|
data/README.md
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
# auto-correct
|
2
2
|
|
3
|
+
> NOTE: This gem has not mantained for a long time, please move to: https://github.com/huacnlee/autocorrect/tree/main/autocorrect-rb
|
4
|
+
|
3
5
|
Automatically add whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols).
|
4
6
|
|
5
7
|
中文、日语、韩语 + 英文混排自动纠正补充空格,此方式已在 Ruby China 使用多年,支持 HTML 处理。
|
6
8
|
|
7
|
-
[![Gem Version](https://badge.fury.io/rb/auto-correct.svg)](https://rubygems.org/gems/auto-correct) [![
|
8
|
-
Status](https://api.travis-ci.org/huacnlee/auto-correct.svg?branch=master&.svg)](http://travis-ci.org/huacnlee/auto-correct)
|
9
|
-
|
9
|
+
[![Gem Version](https://badge.fury.io/rb/auto-correct.svg)](https://rubygems.org/gems/auto-correct) [![build](https://github.com/huacnlee/auto-correct/workflows/build/badge.svg)](https://github.com/huacnlee/auto-correct/actions?query=workflow%3Abuild)
|
10
10
|
|
11
11
|
## Other implements
|
12
12
|
|
13
13
|
- Ruby - [auto-correct](https://github.com/huacnlee/auto-correct).
|
14
14
|
- Go - [go-auto-correct](https://github.com/huacnlee/go-auto-correct).
|
15
|
-
- Rust - [
|
15
|
+
- Rust - [autocorrect](https://github.com/huacnlee/autocorrect).
|
16
16
|
|
17
17
|
## Features
|
18
18
|
|
@@ -36,7 +36,7 @@ AutoCorrect.format("于3月10日开始")
|
|
36
36
|
# => "于 3 月 10 日开始"
|
37
37
|
|
38
38
|
AutoCorrect.format("包装日期为2013年3月10日")
|
39
|
-
# => "包装日期为2013年3月10日"
|
39
|
+
# => "包装日期为 2013 年 3 月 10 日"
|
40
40
|
|
41
41
|
AutoCorrect.format("生产环境中使用Ruby")
|
42
42
|
# => "生产环境中使用 Ruby"
|
@@ -73,24 +73,24 @@ Calculating -------------------------------------
|
|
73
73
|
```
|
74
74
|
|
75
75
|
| Total chars | Duration |
|
76
|
-
|
|
77
|
-
| 50
|
78
|
-
| 100
|
79
|
-
| 400
|
76
|
+
| ----------- | -------- |
|
77
|
+
| 50 | 0.33 ms |
|
78
|
+
| 100 | 0.60 ms |
|
79
|
+
| 400 | 2 ms |
|
80
80
|
|
81
81
|
### FormatHTML
|
82
82
|
|
83
83
|
| Total chars | Duration |
|
84
|
-
|
|
85
|
-
| 2K
|
84
|
+
| ----------- | -------- |
|
85
|
+
| 2K | 7 ms |
|
86
86
|
|
87
87
|
## Use cases
|
88
88
|
|
89
|
-
|
89
|
+
- [Ruby China](https://ruby-china.org) - 目前整站都做了自动转换处理。
|
90
90
|
|
91
91
|
## Links
|
92
92
|
|
93
|
-
|
93
|
+
- [Chinese Copywriting Guidelines](https://github.com/sparanoid/chinese-copywriting-guidelines)
|
94
94
|
|
95
95
|
## License
|
96
96
|
|
data/lib/auto-correct.rb
CHANGED
@@ -1,11 +1 @@
|
|
1
|
-
require "
|
2
|
-
require "auto-correct/base"
|
3
|
-
require "auto-correct/format"
|
4
|
-
require "auto-correct/html"
|
5
|
-
require "auto-correct/string"
|
6
|
-
require "auto-correct/version"
|
7
|
-
|
8
|
-
class AutoCorrect
|
9
|
-
end
|
10
|
-
|
11
|
-
String.send :include, AutoCorrect::String
|
1
|
+
require "autocorrect-rb"
|
metadata
CHANGED
@@ -1,30 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: auto-correct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luikore
|
8
8
|
- Jason Lee
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-10-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: autocorrect-rb
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 2.1.2
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 2.1.2
|
28
28
|
description: Automatically add whitespace between Chinese and and half-width characters
|
29
29
|
(alphabetical letters, numerical digits and symbols).
|
30
30
|
email:
|
@@ -36,16 +36,10 @@ extra_rdoc_files: []
|
|
36
36
|
files:
|
37
37
|
- README.md
|
38
38
|
- lib/auto-correct.rb
|
39
|
-
|
40
|
-
- lib/auto-correct/format.rb
|
41
|
-
- lib/auto-correct/html.rb
|
42
|
-
- lib/auto-correct/strategery.rb
|
43
|
-
- lib/auto-correct/string.rb
|
44
|
-
- lib/auto-correct/version.rb
|
45
|
-
homepage: https://github.com/huacnlee/auto-correct
|
39
|
+
homepage: https://github.com/huacnlee/autocorrect
|
46
40
|
licenses: []
|
47
41
|
metadata: {}
|
48
|
-
post_install_message:
|
42
|
+
post_install_message:
|
49
43
|
rdoc_options: []
|
50
44
|
require_paths:
|
51
45
|
- lib
|
@@ -60,8 +54,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
54
|
- !ruby/object:Gem::Version
|
61
55
|
version: '0'
|
62
56
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
64
|
-
signing_key:
|
57
|
+
rubygems_version: 3.3.3
|
58
|
+
signing_key:
|
65
59
|
specification_version: 4
|
66
60
|
summary: Automatically add whitespace between Chinese and and half-width characters
|
67
61
|
(alphabetical letters, numerical digits and symbols).
|
data/lib/auto-correct/base.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
@@strategies = []
|
3
|
-
|
4
|
-
class << self
|
5
|
-
def rule(one, other, space: false, reverse: false)
|
6
|
-
@@strategies << AutoCorrect::Strategery.new(one, other, space: space, reverse: reverse)
|
7
|
-
end
|
8
|
-
|
9
|
-
def strategies
|
10
|
-
@@strategies
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
data/lib/auto-correct/format.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
CJK = '\p{Han}|\p{Hangul}|\p{Hanunoo}|\p{Katakana}|\p{Hiragana}|\p{Bopomofo}'
|
3
|
-
SPACE = "[ ]"
|
4
|
-
|
5
|
-
# rubocop:disable Style/StringLiterals
|
6
|
-
# EnglishLetter
|
7
|
-
rule "#{CJK}", '[0-9a-zA-Z]', space: true, reverse: true
|
8
|
-
|
9
|
-
# SpecialSymbol
|
10
|
-
rule "#{CJK}", '[\|+$@#*]', space: true, reverse: true
|
11
|
-
rule "#{CJK}", '[\[\(‘“]', space: true
|
12
|
-
rule '[’”\]\)!%]', "#{CJK}", space: true
|
13
|
-
rule '[”\]\)!]', '[a-zA-Z0-9]+', space: true
|
14
|
-
|
15
|
-
# FullwidthPunctuation
|
16
|
-
rule %r([\w#{CJK}]), '[,。!?:;」》】”’]', reverse: true
|
17
|
-
rule '[‘“【「《]', %r([\w#{CJK}]), reverse: true
|
18
|
-
|
19
|
-
class << self
|
20
|
-
FULLDATE_RE = /#{SPACE}{0,}\d+#{SPACE}{0,}年#{SPACE}{0,}\d+#{SPACE}{0,}月#{SPACE}{0,}\d+#{SPACE}{0,}[日号]#{SPACE}{0,}/u
|
21
|
-
DASH_HAN_RE = /([#{CJK})】」》”’])([\-]+)([#{CJK}(【「《“‘])/
|
22
|
-
LEFT_QUOTE_RE = /#{SPACE}([(【「《])/
|
23
|
-
RIGHT_QUOTE_RE = /([)】」》])#{SPACE}/
|
24
|
-
|
25
|
-
def format(str)
|
26
|
-
out = str
|
27
|
-
self.strategies.each do |s|
|
28
|
-
out = s.format(out)
|
29
|
-
end
|
30
|
-
out = remove_full_date_spacing(out)
|
31
|
-
out = space_dash_with_hans(out)
|
32
|
-
out
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def remove_full_date_spacing(str)
|
38
|
-
str.gsub(FULLDATE_RE) do |m|
|
39
|
-
m.gsub(/\s+/, "")
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def space_dash_with_hans(str)
|
44
|
-
str = str.gsub(DASH_HAN_RE, '\1 \2 \3')
|
45
|
-
str = str.gsub(LEFT_QUOTE_RE, '\1')
|
46
|
-
str = str.gsub(RIGHT_QUOTE_RE, '\1')
|
47
|
-
str
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
data/lib/auto-correct/html.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
|
-
class AutoCorrect
|
4
|
-
class << self
|
5
|
-
def format_html(html)
|
6
|
-
doc = Nokogiri::HTML(html)
|
7
|
-
doc.traverse do |node|
|
8
|
-
next unless node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
9
|
-
node.content = AutoCorrect.format(node.content)
|
10
|
-
end
|
11
|
-
doc.css("body").inner_html
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
class Strategery
|
3
|
-
attr_reader :space, :reverse
|
4
|
-
attr_reader :add_space_rules, :remove_space_rules
|
5
|
-
|
6
|
-
def initialize(one, other, space: false, reverse: false)
|
7
|
-
@space = space
|
8
|
-
@reverse = reverse
|
9
|
-
|
10
|
-
@add_space_rules = [
|
11
|
-
/(#{one})(#{other})/u,
|
12
|
-
/(#{other})(#{one})/u
|
13
|
-
]
|
14
|
-
|
15
|
-
@remove_space_rules = [
|
16
|
-
/(#{one})#{SPACE}+(#{other})/u,
|
17
|
-
/(#{other})#{SPACE}+(#{one})/u
|
18
|
-
]
|
19
|
-
end
|
20
|
-
|
21
|
-
def format(str)
|
22
|
-
self.space ? add_space(str) : remove_space(str)
|
23
|
-
end
|
24
|
-
|
25
|
-
def add_space(str)
|
26
|
-
r0, r1 = add_space_rules
|
27
|
-
str = str.gsub(r0) { "#$1 #$2" }
|
28
|
-
if self.reverse
|
29
|
-
str = str.gsub(r1) { "#$1 #$2" }
|
30
|
-
end
|
31
|
-
str
|
32
|
-
end
|
33
|
-
|
34
|
-
def remove_space(str)
|
35
|
-
r0, r1 = remove_space_rules
|
36
|
-
str = str.gsub(r0) { "#$1 #$2" }
|
37
|
-
if self.reverse
|
38
|
-
str = str.gsub(r1) { "#$1 #$2" }
|
39
|
-
end
|
40
|
-
str
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
data/lib/auto-correct/string.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
class AutoCorrect
|
2
|
-
module String
|
3
|
-
def auto_space!
|
4
|
-
ActiveSupport::Deprecation.warn("String.auto_space! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
5
|
-
self.sub!(self, AutoCorrect.format(self))
|
6
|
-
end
|
7
|
-
|
8
|
-
def auto_correct!
|
9
|
-
ActiveSupport::Deprecation.warn("String.auto_correct! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
10
|
-
self.sub!(self, AutoCorrect.format(self))
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
data/lib/auto-correct/version.rb
DELETED