auto-correct 0.1.0.pre0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +35 -36
- data/lib/auto-correct.rb +9 -38
- data/lib/auto-correct/base.rb +13 -0
- data/lib/auto-correct/format.rb +36 -0
- data/lib/auto-correct/html.rb +14 -0
- data/lib/auto-correct/strategery.rb +43 -0
- data/lib/auto-correct/string.rb +13 -0
- data/lib/auto-correct/version.rb +3 -0
- metadata +20 -15
- data/lib/auto-correct/dicts.rb +0 -103
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 89d9754d3ecd0a18d8ef8ee8f245f2ff0b217873ae7b09bb0e4759759a297878
|
4
|
+
data.tar.gz: f34f2c046802a275447e0f602442406aeafd7adee6e27fd6647405ac34e81499
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fba0aafa39062de04a459fadbdb23c3893b7e69dd09c343ec3e16ba41e5d776e3538475ecd6dd2335d9bd0de24bf15d58ad3c0490f87a97f2731fa169cc6dd01
|
7
|
+
data.tar.gz: 72c1c012a63f9ebfb8289b7ce2e630d79aa47085720b8a98816b0763318d24460b22633fc0d8ffc1b012864bf660d69f4b9647fb10390d1643ddb8cd94e0b0a8
|
data/README.md
CHANGED
@@ -1,62 +1,61 @@
|
|
1
1
|
# auto-correct
|
2
2
|
|
3
|
+
Automatically add spaces between Chinese and English words.
|
4
|
+
|
3
5
|
自动纠正中文英文混排是一些不够好的写法,纠正错误的名词大小写。
|
4
6
|
|
5
|
-
|
7
|
+
[![Gem Version](https://badge.fury.io/rb/auto-correct.svg)](https://rubygems.org/gems/auto-correct) [![Build
|
8
|
+
Status](https://api.travis-ci.org/huacnlee/auto-correct.svg?branch=master&.svg)](http://travis-ci.org/huacnlee/auto-correct) [![Code Climate](https://codeclimate.com/github/huacnlee/auto-correct/badges/gpa.svg)](https://codeclimate.com/github/huacnlee/auto-correct)
|
6
9
|
|
7
|
-
```
|
8
|
-
[经验之谈]转行做ruby程序员的8个月, mysql 经验
|
9
|
-
```
|
10
10
|
|
11
|
-
|
11
|
+
## Other implements
|
12
12
|
|
13
|
-
|
14
|
-
[
|
15
|
-
```
|
13
|
+
- [auto-correct](https://github.com/huacnlee/auto-correct) - Ruby
|
14
|
+
- [go-auto-correct](https://github.com/huacnlee/go-auto-correct) - Go
|
16
15
|
|
17
|
-
|
18
|
-
Status](https://secure.travis-ci.org/huacnlee/auto-space.png?branch=master&.png)](http://travis-ci.org/huacnlee/auto-space)
|
16
|
+
## Features
|
19
17
|
|
20
|
-
|
18
|
+
- Auto add spacings between Chinese and English words.
|
19
|
+
- HTML content support.
|
21
20
|
|
22
|
-
|
23
|
-
irb> require 'auto-correct'
|
24
|
-
true
|
21
|
+
## Usage
|
25
22
|
|
26
|
-
|
27
|
-
关于 SSH 连接的 Permission denied (publickey).
|
23
|
+
`AutoCorrect.format` method for plain text.
|
28
24
|
|
29
|
-
|
30
|
-
|
25
|
+
```ruby
|
26
|
+
AutoCorrect.format("关于ssh连接的Permission denied(publickey).")
|
27
|
+
# => "关于 SSH 连接的 Permission denied (publickey)."
|
31
28
|
|
32
|
-
|
33
|
-
|
29
|
+
AutoCorrect.format("怎样追踪一个repo的新feature 和进展呢?")
|
30
|
+
# => "怎样追踪一个 repo 的新 feature 和进展呢?"
|
34
31
|
|
35
|
-
|
36
|
-
|
37
|
-
```
|
32
|
+
AutoCorrect.format("vps上sessions不生效,但在本地的环境是ok的,why?")
|
33
|
+
# => "VPS 上 sessions 不生效,但在本地的环境是 OK 的,why?"
|
38
34
|
|
39
|
-
|
35
|
+
AutoCorrect.format("bootstrap control-group对齐问题")
|
36
|
+
# => "Bootstrap control-group 对齐问"
|
37
|
+
```
|
40
38
|
|
41
|
-
|
39
|
+
`AutoCorrect.format_html` method for HTML content.
|
42
40
|
|
41
|
+
```ruby
|
42
|
+
AutoCorrect.format_html("<div><p>长桥LongBridge App下载</p><p>最新版本1.0</p></div>")
|
43
|
+
# => "<div><p>长桥 LongBridge App 下载</p><p>最新版本 1.0</p></div>"
|
43
44
|
```
|
44
|
-
$ rake benchmark
|
45
|
-
user system total real
|
46
|
-
100 times 0.000000 0.000000 0.000000 ( 0.002223)
|
47
|
-
1000 times 0.030000 0.000000 0.030000 ( 0.024711)
|
48
|
-
10000 times 0.230000 0.000000 0.230000 ( 0.240850)
|
49
|
-
```
|
50
45
|
|
51
|
-
##
|
46
|
+
## Benchmark
|
47
|
+
|
48
|
+
TODO
|
52
49
|
|
53
|
-
* 'Foo'的"Bar" -> 'Foo' 的 "Bar"
|
54
|
-
* 什么,时候 -> 什么, 时候 -> 什么,时候
|
55
50
|
|
56
|
-
##
|
51
|
+
## Use cases
|
57
52
|
|
58
53
|
* [Ruby China](http://ruby-china.org) - 目前整站的标题都做了自动转换处理。
|
59
54
|
|
60
|
-
##
|
55
|
+
## Links
|
61
56
|
|
62
57
|
* [Chinese Copywriting Guidelines](https://github.com/sparanoid/chinese-copywriting-guidelines)
|
58
|
+
|
59
|
+
## License
|
60
|
+
|
61
|
+
This project under MIT license.
|
data/lib/auto-correct.rb
CHANGED
@@ -1,40 +1,11 @@
|
|
1
|
-
|
2
|
-
require "auto-correct/
|
1
|
+
require "auto-correct/strategery"
|
2
|
+
require "auto-correct/base"
|
3
|
+
require "auto-correct/format"
|
4
|
+
require "auto-correct/html"
|
5
|
+
require "auto-correct/string"
|
6
|
+
require "auto-correct/version"
|
3
7
|
|
4
|
-
class
|
5
|
-
def auto_space!
|
6
|
-
self.gsub! /((?![年月日号])\p{Han})([a-zA-Z0-9+$@#\[\(\/‘“])/u do
|
7
|
-
"#$1 #$2"
|
8
|
-
end
|
9
|
-
|
10
|
-
self.gsub! /([a-zA-Z0-9+$’”\]\)@#!\/]|[\d[年月日]]{2,})((?![年月日号])\p{Han})/u do
|
11
|
-
"#$1 #$2"
|
12
|
-
end
|
13
|
-
|
14
|
-
# Fix () [] near the English and number
|
15
|
-
self.gsub! /([a-zA-Z0-9]+)([\[\(‘“])/u do
|
16
|
-
"#$1 #$2"
|
17
|
-
end
|
18
|
-
|
19
|
-
self.gsub! /([\)\]’”])([a-zA-Z0-9]+)/u do
|
20
|
-
"#$1 #$2"
|
21
|
-
end
|
22
|
-
|
23
|
-
self
|
24
|
-
end
|
25
|
-
|
26
|
-
def auto_correct!
|
27
|
-
self.auto_space!
|
28
|
-
|
29
|
-
self.gsub! /([\d\p{Han}]|\s|^)([a-zA-Z\d\-\_\.]+)([\d\p{Han}]|\s|$)/u do
|
30
|
-
key = "#$2".downcase
|
31
|
-
if AutoCorrect::DICTS.has_key?(key)
|
32
|
-
["#$1",AutoCorrect::DICTS[key],"#$3"].join("")
|
33
|
-
else
|
34
|
-
"#$1#$2#$3"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
self
|
39
|
-
end
|
8
|
+
class AutoCorrect
|
40
9
|
end
|
10
|
+
|
11
|
+
String.send :include, AutoCorrect::String
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
@@strategies = []
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def rule(one, other, space: false, reverse: false)
|
6
|
+
@@strategies << AutoCorrect::Strategery.new(one, other, space: space, reverse: reverse)
|
7
|
+
end
|
8
|
+
|
9
|
+
def strategies
|
10
|
+
@@strategies
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
# rubocop:disable Style/StringLiterals
|
3
|
+
# EnglishLetter
|
4
|
+
rule '\p{Han}', '[0-9a-zA-Z]', space: true, reverse: true
|
5
|
+
|
6
|
+
# SpecialSymbol
|
7
|
+
rule '\p{Han}', '[\|+$@#]', space: true, reverse: true
|
8
|
+
rule '\p{Han}', '[\[\(‘“]', space: true
|
9
|
+
rule '[’”\]\)!%]', '\p{Han}', space: true
|
10
|
+
rule '[”\]\)!]', '[a-zA-Z0-9]+', space: true
|
11
|
+
|
12
|
+
# FullwidthPunctuation
|
13
|
+
rule '[\w\p{Han}]', '[,。!?:;」》】”’]', reverse: true
|
14
|
+
rule '[‘“【「《]', '[\w\p{Han}]', reverse: true
|
15
|
+
|
16
|
+
class << self
|
17
|
+
FULLDATE_RE = /[\s]{0,}\d+[\s]{0,}年[\s]{0,}\d+[\s]{0,}月[\s]{0,}\d+[\s]{0,}[日号][\s]{0,}/u
|
18
|
+
|
19
|
+
def format(str)
|
20
|
+
out = str
|
21
|
+
self.strategies.each do |s|
|
22
|
+
out = s.format(out)
|
23
|
+
end
|
24
|
+
out = remove_full_date_spacing(out)
|
25
|
+
out
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def remove_full_date_spacing(str)
|
31
|
+
str.gsub(FULLDATE_RE) do |m|
|
32
|
+
m.gsub(/\s+/, "")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
class AutoCorrect
|
4
|
+
class << self
|
5
|
+
def format_html(html)
|
6
|
+
doc = Nokogiri::HTML(html)
|
7
|
+
doc.traverse do |node|
|
8
|
+
next unless node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
9
|
+
node.content = AutoCorrect.format(node.content)
|
10
|
+
end
|
11
|
+
doc.css("body").inner_html
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
class Strategery
|
3
|
+
attr_reader :space, :reverse
|
4
|
+
attr_reader :add_space_rules, :remove_space_rules
|
5
|
+
|
6
|
+
def initialize(one, other, space: false, reverse: false)
|
7
|
+
@space = space
|
8
|
+
@reverse = reverse
|
9
|
+
|
10
|
+
@add_space_rules = [
|
11
|
+
/(#{one})(#{other})/u,
|
12
|
+
/(#{other})(#{one})/u
|
13
|
+
]
|
14
|
+
|
15
|
+
@remove_space_rules = [
|
16
|
+
/(#{one})\s+(#{other})/u,
|
17
|
+
/(#{other})\s+(#{one})/u
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
def format(str)
|
22
|
+
self.space ? add_space(str) : remove_space(str)
|
23
|
+
end
|
24
|
+
|
25
|
+
def add_space(str)
|
26
|
+
r0, r1 = add_space_rules
|
27
|
+
str = str.gsub(r0) { "#$1 #$2" }
|
28
|
+
if self.reverse
|
29
|
+
str = str.gsub(r1) { "#$1 #$2" }
|
30
|
+
end
|
31
|
+
str
|
32
|
+
end
|
33
|
+
|
34
|
+
def remove_space(str)
|
35
|
+
r0, r1 = remove_space_rules
|
36
|
+
str = str.gsub(r0) { "#$1 #$2" }
|
37
|
+
if self.reverse
|
38
|
+
str = str.gsub(r1) { "#$1 #$2" }
|
39
|
+
end
|
40
|
+
str
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class AutoCorrect
|
2
|
+
module String
|
3
|
+
def auto_space!
|
4
|
+
ActiveSupport::Deprecation.warn("String.auto_space! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
5
|
+
self.sub!(self, AutoCorrect.format(self))
|
6
|
+
end
|
7
|
+
|
8
|
+
def auto_correct!
|
9
|
+
ActiveSupport::Deprecation.warn("String.auto_correct! is deprecated and will be removed in auto-corrrect 1.0, please use AutoCorrect.format instead.")
|
10
|
+
self.sub!(self, AutoCorrect.format(self))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: auto-correct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luikore
|
@@ -9,23 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-01-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: nokogiri
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: '1.4'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
28
|
-
description:
|
27
|
+
version: '1.4'
|
28
|
+
description: Automatically add whitespace between Chinese and and half-width characters
|
29
|
+
(alphabetical letters, numerical digits and symbols).
|
29
30
|
email:
|
30
31
|
- usurffx@gmail.com
|
31
32
|
- huacnlee@gmail.com
|
@@ -35,7 +36,12 @@ extra_rdoc_files: []
|
|
35
36
|
files:
|
36
37
|
- README.md
|
37
38
|
- lib/auto-correct.rb
|
38
|
-
- lib/auto-correct/
|
39
|
+
- lib/auto-correct/base.rb
|
40
|
+
- lib/auto-correct/format.rb
|
41
|
+
- lib/auto-correct/html.rb
|
42
|
+
- lib/auto-correct/strategery.rb
|
43
|
+
- lib/auto-correct/string.rb
|
44
|
+
- lib/auto-correct/version.rb
|
39
45
|
homepage: https://github.com/huacnlee/auto-correct
|
40
46
|
licenses: []
|
41
47
|
metadata: {}
|
@@ -50,14 +56,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
50
56
|
version: '0'
|
51
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
58
|
requirements:
|
53
|
-
- - "
|
59
|
+
- - ">="
|
54
60
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
61
|
+
version: '0'
|
56
62
|
requirements: []
|
57
|
-
|
58
|
-
rubygems_version: 2.2.2
|
63
|
+
rubygems_version: 3.0.3
|
59
64
|
signing_key:
|
60
65
|
specification_version: 4
|
61
|
-
summary:
|
66
|
+
summary: Automatically add whitespace between Chinese and and half-width characters
|
67
|
+
(alphabetical letters, numerical digits and symbols).
|
62
68
|
test_files: []
|
63
|
-
has_rdoc:
|
data/lib/auto-correct/dicts.rb
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
module AutoCorrect
|
2
|
-
DICTS = {
|
3
|
-
# Ruby
|
4
|
-
"ruby" => "Ruby",
|
5
|
-
"rails" => "Rails",
|
6
|
-
"rubygems" => "RubyGems",
|
7
|
-
"ror" => "Ruby on Rails",
|
8
|
-
"rubyconf" => "RubyConf",
|
9
|
-
"railsconf" => "RailsConf",
|
10
|
-
"rubytuesday" => "Ruby Tuesday",
|
11
|
-
"jruby" => "JRuby",
|
12
|
-
"mruby" => "mRuby",
|
13
|
-
"rvm" => "RVM",
|
14
|
-
"rbenv" => "rbenv",
|
15
|
-
"yard" => "YARD",
|
16
|
-
"rdoc" => "RDoc",
|
17
|
-
"rspec" => "RSpec",
|
18
|
-
"minitest" => "MiniTest",
|
19
|
-
"coffeescript" => "CoffeeScript",
|
20
|
-
"scss" => "SCSS",
|
21
|
-
"sass" => "Sass",
|
22
|
-
"sidekiq" => "Sidekiq",
|
23
|
-
"railscasts" => "RailsCasts",
|
24
|
-
"execjs" => "ExecJS",
|
25
|
-
|
26
|
-
# Python
|
27
|
-
|
28
|
-
# Node.js
|
29
|
-
"nodejs" => "Node.js",
|
30
|
-
|
31
|
-
# Go
|
32
|
-
|
33
|
-
# Cocoa
|
34
|
-
"reactivecocoa" => "ReactiveCocoa",
|
35
|
-
|
36
|
-
# Programming
|
37
|
-
"ssh" => "SSH",
|
38
|
-
"css" => "CSS",
|
39
|
-
"html" => "HTML",
|
40
|
-
"javascript" => "JavaScript",
|
41
|
-
"js" => "JS",
|
42
|
-
"png" => "PNG",
|
43
|
-
"dsl" => "DSL",
|
44
|
-
"tdd" => "TDD",
|
45
|
-
"bdd" => "BDD",
|
46
|
-
|
47
|
-
# Sites
|
48
|
-
"github" => "GitHub",
|
49
|
-
"gist" => "Gist",
|
50
|
-
"ruby_china" => "Ruby China",
|
51
|
-
"ruby-china" => "Ruby China",
|
52
|
-
"rubychina" => "Ruby China",
|
53
|
-
"v2ex" => "V2EX",
|
54
|
-
"heroku" => "Heroku",
|
55
|
-
"stackoverflow" => "Stack Overflow",
|
56
|
-
"stackexchange" => "StackExchange",
|
57
|
-
|
58
|
-
|
59
|
-
# Databases
|
60
|
-
"mysql" => "MySQL",
|
61
|
-
"postgresql" => "PostgreSQL",
|
62
|
-
"sqlite" => "SQLite",
|
63
|
-
"mongodb" => "MongoDB",
|
64
|
-
"rethinkdb" => "RethinkDB",
|
65
|
-
"elasticsearch" => "Elasticsearch",
|
66
|
-
"sphinx" => "Sphinx",
|
67
|
-
|
68
|
-
# OpenSource Projects
|
69
|
-
"gitlab" => "GitLab",
|
70
|
-
"gitlabci" => "GitLab CI",
|
71
|
-
"fontawsome" => "FontAwsome",
|
72
|
-
"bootstrap" => "Bootstrap",
|
73
|
-
"less" => "Less",
|
74
|
-
"jquery" => "jQuery",
|
75
|
-
"requirejs" => "RequireJS",
|
76
|
-
"underscore" => "Underscore",
|
77
|
-
"backbone" => "Backbone",
|
78
|
-
"seajs" => "SeaJS",
|
79
|
-
"imagemagick" => "ImageMagick",
|
80
|
-
|
81
|
-
# Tools
|
82
|
-
"vim" => "VIM",
|
83
|
-
"emacs" => "Emacs",
|
84
|
-
"textmate" => "TextMate",
|
85
|
-
"sublime" => "Sublime",
|
86
|
-
"rubymine" => "RubyMine",
|
87
|
-
"sequelpro" => "Sequel Pro",
|
88
|
-
"virtualbox" => "VirtualBox",
|
89
|
-
"safari" => "Safari",
|
90
|
-
"chrome" => "Chrome",
|
91
|
-
"ie" => "IE",
|
92
|
-
|
93
|
-
# Misc
|
94
|
-
"ios" => "iOS",
|
95
|
-
"iphone" => "iPhone",
|
96
|
-
"android" => "Android",
|
97
|
-
"osx" => "OS X",
|
98
|
-
"mac" => "Mac",
|
99
|
-
"api" => "API",
|
100
|
-
"wi-fi" => "Wi-Fi",
|
101
|
-
"wifi" => "Wi-Fi"
|
102
|
-
}
|
103
|
-
end
|