auto-correct 0.1.0.pre0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +35 -36
- data/lib/auto-correct.rb +9 -38
- data/lib/auto-correct/base.rb +13 -0
- data/lib/auto-correct/format.rb +36 -0
- data/lib/auto-correct/html.rb +14 -0
- data/lib/auto-correct/strategery.rb +43 -0
- data/lib/auto-correct/string.rb +13 -0
- data/lib/auto-correct/version.rb +3 -0
- metadata +20 -15
- data/lib/auto-correct/dicts.rb +0 -103
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 89d9754d3ecd0a18d8ef8ee8f245f2ff0b217873ae7b09bb0e4759759a297878
|
4
|
+
data.tar.gz: f34f2c046802a275447e0f602442406aeafd7adee6e27fd6647405ac34e81499
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fba0aafa39062de04a459fadbdb23c3893b7e69dd09c343ec3e16ba41e5d776e3538475ecd6dd2335d9bd0de24bf15d58ad3c0490f87a97f2731fa169cc6dd01
|
7
|
+
data.tar.gz: 72c1c012a63f9ebfb8289b7ce2e630d79aa47085720b8a98816b0763318d24460b22633fc0d8ffc1b012864bf660d69f4b9647fb10390d1643ddb8cd94e0b0a8
|
data/README.md
CHANGED
@@ -1,62 +1,61 @@
|
|
1
1
|
# auto-correct
|
2
2
|
|
3
|
+
Automatically add spaces between Chinese and English words.
|
4
|
+
|
3
5
|
自动纠正中文英文混排是一些不够好的写法,纠正错误的名词大小写。
|
4
6
|
|
5
|
-
|
7
|
+
[](https://rubygems.org/gems/auto-correct) [](http://travis-ci.org/huacnlee/auto-correct) [](https://codeclimate.com/github/huacnlee/auto-correct)
|
6
9
|
|
7
|
-
```
|
8
|
-
[经验之谈]转行做ruby程序员的8个月, mysql 经验
|
9
|
-
```
|
10
10
|
|
11
|
-
|
11
|
+
## Other implements
|
12
12
|
|
13
|
-
|
14
|
-
[
|
15
|
-
```
|
13
|
+
- [auto-correct](https://github.com/huacnlee/auto-correct) - Ruby
|
14
|
+
- [go-auto-correct](https://github.com/huacnlee/go-auto-correct) - Go
|
16
15
|
|
17
|
-
|
18
|
-
Status](https://secure.travis-ci.org/huacnlee/auto-space.png?branch=master&.png)](http://travis-ci.org/huacnlee/auto-space)
|
16
|
+
## Features
|
19
17
|
|
20
|
-
|
18
|
+
- Auto add spacings between Chinese and English words.
|
19
|
+
- HTML content support.
|
21
20
|
|
22
|
-
|
23
|
-
irb> require 'auto-correct'
|
24
|
-
true
|
21
|
+
## Usage
|
25
22
|
|
26
|
-
|
27
|
-
关于 SSH 连接的 Permission denied (publickey).
|
23
|
+
`AutoCorrect.format` method for plain text.
|
28
24
|
|
29
|
-
|
30
|
-
|
25
|
+
```ruby
|
26
|
+
AutoCorrect.format("关于ssh连接的Permission denied(publickey).")
|
27
|
+
# => "关于 SSH 连接的 Permission denied (publickey)."
|
31
28
|
|
32
|
-
|
33
|
-
|
29
|
+
AutoCorrect.format("怎样追踪一个repo的新feature 和进展呢?")
|
30
|
+
# => "怎样追踪一个 repo 的新 feature 和进展呢?"
|
34
31
|
|
35
|
-
|
36
|
-
|
37
|
-
```
|
32
|
+
AutoCorrect.format("vps上sessions不生效,但在本地的环境是ok的,why?")
|
33
|
+
# => "VPS 上 sessions 不生效,但在本地的环境是 OK 的,why?"
|
38
34
|
|
39
|
-
|
35
|
+
AutoCorrect.format("bootstrap control-group对齐问题")
|
36
|
+
# => "Bootstrap control-group 对齐问"
|
37
|
+
```
|
40
38
|
|
41
|
-
|
39
|
+
`AutoCorrect.format_html` method for HTML content.
|
42
40
|
|
41
|
+
```ruby
|
42
|
+
AutoCorrect.format_html("<div><p>长桥LongBridge App下载</p><p>最新版本1.0</p></div>")
|
43
|
+
# => "<div><p>长桥 LongBridge App 下载</p><p>最新版本 1.0</p></div>"
|
43
44
|
```
|
44
|
-
$ rake benchmark
|
45
|
-
user system total real
|
46
|
-
100 times 0.000000 0.000000 0.000000 ( 0.002223)
|
47
|
-
1000 times 0.030000 0.000000 0.030000 ( 0.024711)
|
48
|
-
10000 times 0.230000 0.000000 0.230000 ( 0.240850)
|
49
|
-
```
|
50
45
|
|
51
|
-
##
|
46
|
+
## Benchmark
|
47
|
+
|
48
|
+
TODO
|
52
49
|
|
53
|
-
* 'Foo'的"Bar" -> 'Foo' 的 "Bar"
|
54
|
-
* 什么,时候 -> 什么, 时候 -> 什么,时候
|
55
50
|
|
56
|
-
##
|
51
|
+
## Use cases
|
57
52
|
|
58
53
|
* [Ruby China](http://ruby-china.org) - 目前整站的标题都做了自动转换处理。
|
59
54
|
|
60
|
-
##
|
55
|
+
## Links
|
61
56
|
|
62
57
|
* [Chinese Copywriting Guidelines](https://github.com/sparanoid/chinese-copywriting-guidelines)
|
58
|
+
|
59
|
+
## License
|
60
|
+
|
61
|
+
This project under MIT license.
|
data/lib/auto-correct.rb
CHANGED
@@ -1,40 +1,11 @@
|
|
1
|
-
|
2
|
-
require "auto-correct/
|
1
|
+
require "auto-correct/strategery"
|
2
|
+
require "auto-correct/base"
|
3
|
+
require "auto-correct/format"
|
4
|
+
require "auto-correct/html"
|
5
|
+
require "auto-correct/string"
|
6
|
+
require "auto-correct/version"
|
3
7
|
|
4
|
-
class
|
5
|
-
def auto_space!
|
6
|
-
self.gsub! /((?![年月日号])\p{Han})([a-zA-Z0-9+$@#\[\(\/‘“])/u do
|
7
|
-
"#$1 #$2"
|
8
|
-
end
|
9
|
-
|
10
|
-
self.gsub! /([a-zA-Z0-9+$’”\]\)@#!\/]|[\d[年月日]]{2,})((?![年月日号])\p{Han})/u do
|
11
|
-
"#$1 #$2"
|
12
|
-
end
|
13
|
-
|
14
|
-
# Fix () [] near the English and number
|
15
|
-
self.gsub! /([a-zA-Z0-9]+)([\[\(‘“])/u do
|
16
|
-
"#$1 #$2"
|
17
|
-
end
|
18
|
-
|
19
|
-
self.gsub! /([\)\]’”])([a-zA-Z0-9]+)/u do
|
20
|
-
"#$1 #$2"
|
21
|
-
end
|
22
|
-
|
23
|
-
self
|
24
|
-
end
|
25
|
-
|
26
|
-
def auto_correct!
|
27
|
-
self.auto_space!
|
28
|
-
|
29
|
-
self.gsub! /([\d\p{Han}]|\s|^)([a-zA-Z\d\-\_\.]+)([\d\p{Han}]|\s|$)/u do
|
30
|
-
key = "#$2".downcase
|
31
|
-
if AutoCorrect::DICTS.has_key?(key)
|
32
|
-
["#$1",AutoCorrect::DICTS[key],"#$3"].join("")
|
33
|
-
else
|
34
|
-
"#$1#$2#$3"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
self
|
39
|
-
end
|
8
|
+
class AutoCorrect
|
40
9
|
end
|
10
|
+
|
11
|
+
String.send :include, AutoCorrect::String
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
@@strategies = []
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def rule(one, other, space: false, reverse: false)
|
6
|
+
@@strategies << AutoCorrect::Strategery.new(one, other, space: space, reverse: reverse)
|
7
|
+
end
|
8
|
+
|
9
|
+
def strategies
|
10
|
+
@@strategies
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
# rubocop:disable Style/StringLiterals
|
3
|
+
# EnglishLetter
|
4
|
+
rule '\p{Han}', '[0-9a-zA-Z]', space: true, reverse: true
|
5
|
+
|
6
|
+
# SpecialSymbol
|
7
|
+
rule '\p{Han}', '[\|+$@#]', space: true, reverse: true
|
8
|
+
rule '\p{Han}', '[\[\(‘“]', space: true
|
9
|
+
rule '[’”\]\)!%]', '\p{Han}', space: true
|
10
|
+
rule '[”\]\)!]', '[a-zA-Z0-9]+', space: true
|
11
|
+
|
12
|
+
# FullwidthPunctuation
|
13
|
+
rule '[\w\p{Han}]', '[,。!?:;」》】”’]', reverse: true
|
14
|
+
rule '[‘“【「《]', '[\w\p{Han}]', reverse: true
|
15
|
+
|
16
|
+
class << self
|
17
|
+
FULLDATE_RE = /[\s]{0,}\d+[\s]{0,}年[\s]{0,}\d+[\s]{0,}月[\s]{0,}\d+[\s]{0,}[日号][\s]{0,}/u
|
18
|
+
|
19
|
+
def format(str)
|
20
|
+
out = str
|
21
|
+
self.strategies.each do |s|
|
22
|
+
out = s.format(out)
|
23
|
+
end
|
24
|
+
out = remove_full_date_spacing(out)
|
25
|
+
out
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def remove_full_date_spacing(str)
|
31
|
+
str.gsub(FULLDATE_RE) do |m|
|
32
|
+
m.gsub(/\s+/, "")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
class AutoCorrect
|
4
|
+
class << self
|
5
|
+
def format_html(html)
|
6
|
+
doc = Nokogiri::HTML(html)
|
7
|
+
doc.traverse do |node|
|
8
|
+
next unless node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
9
|
+
node.content = AutoCorrect.format(node.content)
|
10
|
+
end
|
11
|
+
doc.css("body").inner_html
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
class Strategery
|
3
|
+
attr_reader :space, :reverse
|
4
|
+
attr_reader :add_space_rules, :remove_space_rules
|
5
|
+
|
6
|
+
def initialize(one, other, space: false, reverse: false)
|
7
|
+
@space = space
|
8
|
+
@reverse = reverse
|
9
|
+
|
10
|
+
@add_space_rules = [
|
11
|
+
/(#{one})(#{other})/u,
|
12
|
+
/(#{other})(#{one})/u
|
13
|
+
]
|
14
|
+
|
15
|
+
@remove_space_rules = [
|
16
|
+
/(#{one})\s+(#{other})/u,
|
17
|
+
/(#{other})\s+(#{one})/u
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
def format(str)
|
22
|
+
self.space ? add_space(str) : remove_space(str)
|
23
|
+
end
|
24
|
+
|
25
|
+
def add_space(str)
|
26
|
+
r0, r1 = add_space_rules
|
27
|
+
str = str.gsub(r0) { "#$1 #$2" }
|
28
|
+
if self.reverse
|
29
|
+
str = str.gsub(r1) { "#$1 #$2" }
|
30
|
+
end
|
31
|
+
str
|
32
|
+
end
|
33
|
+
|
34
|
+
def remove_space(str)
|
35
|
+
r0, r1 = remove_space_rules
|
36
|
+
str = str.gsub(r0) { "#$1 #$2" }
|
37
|
+
if self.reverse
|
38
|
+
str = str.gsub(r1) { "#$1 #$2" }
|
39
|
+
end
|
40
|
+
str
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
module String
|
3
|
+
def auto_space!
|
4
|
+
ActiveSupport::Deprecation.warn("String.auto_space! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
5
|
+
self.sub!(self, AutoCorrect.format(self))
|
6
|
+
end
|
7
|
+
|
8
|
+
def auto_correct!
|
9
|
+
ActiveSupport::Deprecation.warn("String.auto_correct! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
10
|
+
self.sub!(self, AutoCorrect.format(self))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: auto-correct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luikore
|
@@ -9,23 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-01-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: nokogiri
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: '1.4'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
28
|
-
description:
|
27
|
+
version: '1.4'
|
28
|
+
description: Automatically add whitespace between Chinese and and half-width characters
|
29
|
+
(alphabetical letters, numerical digits and symbols).
|
29
30
|
email:
|
30
31
|
- usurffx@gmail.com
|
31
32
|
- huacnlee@gmail.com
|
@@ -35,7 +36,12 @@ extra_rdoc_files: []
|
|
35
36
|
files:
|
36
37
|
- README.md
|
37
38
|
- lib/auto-correct.rb
|
38
|
-
- lib/auto-correct/
|
39
|
+
- lib/auto-correct/base.rb
|
40
|
+
- lib/auto-correct/format.rb
|
41
|
+
- lib/auto-correct/html.rb
|
42
|
+
- lib/auto-correct/strategery.rb
|
43
|
+
- lib/auto-correct/string.rb
|
44
|
+
- lib/auto-correct/version.rb
|
39
45
|
homepage: https://github.com/huacnlee/auto-correct
|
40
46
|
licenses: []
|
41
47
|
metadata: {}
|
@@ -50,14 +56,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
50
56
|
version: '0'
|
51
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
58
|
requirements:
|
53
|
-
- - "
|
59
|
+
- - ">="
|
54
60
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
61
|
+
version: '0'
|
56
62
|
requirements: []
|
57
|
-
|
58
|
-
rubygems_version: 2.2.2
|
63
|
+
rubygems_version: 3.0.3
|
59
64
|
signing_key:
|
60
65
|
specification_version: 4
|
61
|
-
summary:
|
66
|
+
summary: Automatically add whitespace between Chinese and and half-width characters
|
67
|
+
(alphabetical letters, numerical digits and symbols).
|
62
68
|
test_files: []
|
63
|
-
has_rdoc:
|
data/lib/auto-correct/dicts.rb
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
module AutoCorrect
|
2
|
-
DICTS = {
|
3
|
-
# Ruby
|
4
|
-
"ruby" => "Ruby",
|
5
|
-
"rails" => "Rails",
|
6
|
-
"rubygems" => "RubyGems",
|
7
|
-
"ror" => "Ruby on Rails",
|
8
|
-
"rubyconf" => "RubyConf",
|
9
|
-
"railsconf" => "RailsConf",
|
10
|
-
"rubytuesday" => "Ruby Tuesday",
|
11
|
-
"jruby" => "JRuby",
|
12
|
-
"mruby" => "mRuby",
|
13
|
-
"rvm" => "RVM",
|
14
|
-
"rbenv" => "rbenv",
|
15
|
-
"yard" => "YARD",
|
16
|
-
"rdoc" => "RDoc",
|
17
|
-
"rspec" => "RSpec",
|
18
|
-
"minitest" => "MiniTest",
|
19
|
-
"coffeescript" => "CoffeeScript",
|
20
|
-
"scss" => "SCSS",
|
21
|
-
"sass" => "Sass",
|
22
|
-
"sidekiq" => "Sidekiq",
|
23
|
-
"railscasts" => "RailsCasts",
|
24
|
-
"execjs" => "ExecJS",
|
25
|
-
|
26
|
-
# Python
|
27
|
-
|
28
|
-
# Node.js
|
29
|
-
"nodejs" => "Node.js",
|
30
|
-
|
31
|
-
# Go
|
32
|
-
|
33
|
-
# Cocoa
|
34
|
-
"reactivecocoa" => "ReactiveCocoa",
|
35
|
-
|
36
|
-
# Programming
|
37
|
-
"ssh" => "SSH",
|
38
|
-
"css" => "CSS",
|
39
|
-
"html" => "HTML",
|
40
|
-
"javascript" => "JavaScript",
|
41
|
-
"js" => "JS",
|
42
|
-
"png" => "PNG",
|
43
|
-
"dsl" => "DSL",
|
44
|
-
"tdd" => "TDD",
|
45
|
-
"bdd" => "BDD",
|
46
|
-
|
47
|
-
# Sites
|
48
|
-
"github" => "GitHub",
|
49
|
-
"gist" => "Gist",
|
50
|
-
"ruby_china" => "Ruby China",
|
51
|
-
"ruby-china" => "Ruby China",
|
52
|
-
"rubychina" => "Ruby China",
|
53
|
-
"v2ex" => "V2EX",
|
54
|
-
"heroku" => "Heroku",
|
55
|
-
"stackoverflow" => "Stack Overflow",
|
56
|
-
"stackexchange" => "StackExchange",
|
57
|
-
|
58
|
-
|
59
|
-
# Databases
|
60
|
-
"mysql" => "MySQL",
|
61
|
-
"postgresql" => "PostgreSQL",
|
62
|
-
"sqlite" => "SQLite",
|
63
|
-
"mongodb" => "MongoDB",
|
64
|
-
"rethinkdb" => "RethinkDB",
|
65
|
-
"elasticsearch" => "Elasticsearch",
|
66
|
-
"sphinx" => "Sphinx",
|
67
|
-
|
68
|
-
# OpenSource Projects
|
69
|
-
"gitlab" => "GitLab",
|
70
|
-
"gitlabci" => "GitLab CI",
|
71
|
-
"fontawsome" => "FontAwsome",
|
72
|
-
"bootstrap" => "Bootstrap",
|
73
|
-
"less" => "Less",
|
74
|
-
"jquery" => "jQuery",
|
75
|
-
"requirejs" => "RequireJS",
|
76
|
-
"underscore" => "Underscore",
|
77
|
-
"backbone" => "Backbone",
|
78
|
-
"seajs" => "SeaJS",
|
79
|
-
"imagemagick" => "ImageMagick",
|
80
|
-
|
81
|
-
# Tools
|
82
|
-
"vim" => "VIM",
|
83
|
-
"emacs" => "Emacs",
|
84
|
-
"textmate" => "TextMate",
|
85
|
-
"sublime" => "Sublime",
|
86
|
-
"rubymine" => "RubyMine",
|
87
|
-
"sequelpro" => "Sequel Pro",
|
88
|
-
"virtualbox" => "VirtualBox",
|
89
|
-
"safari" => "Safari",
|
90
|
-
"chrome" => "Chrome",
|
91
|
-
"ie" => "IE",
|
92
|
-
|
93
|
-
# Misc
|
94
|
-
"ios" => "iOS",
|
95
|
-
"iphone" => "iPhone",
|
96
|
-
"android" => "Android",
|
97
|
-
"osx" => "OS X",
|
98
|
-
"mac" => "Mac",
|
99
|
-
"api" => "API",
|
100
|
-
"wi-fi" => "Wi-Fi",
|
101
|
-
"wifi" => "Wi-Fi"
|
102
|
-
}
|
103
|
-
end
|