virastar 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +25 -0
- data/LICENSE +21 -0
- data/README.md +82 -0
- data/Rakefile +2 -0
- data/lib/virastar/version.rb +3 -0
- data/lib/virastar.rb +128 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/virastar_spec.rb +196 -0
- data/virastar.gemspec +23 -0
- metadata +111 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
virastar (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.2)
|
10
|
+
rspec (2.1.0)
|
11
|
+
rspec-core (~> 2.1.0)
|
12
|
+
rspec-expectations (~> 2.1.0)
|
13
|
+
rspec-mocks (~> 2.1.0)
|
14
|
+
rspec-core (2.1.0)
|
15
|
+
rspec-expectations (2.1.0)
|
16
|
+
diff-lcs (~> 1.1.2)
|
17
|
+
rspec-mocks (2.1.0)
|
18
|
+
|
19
|
+
PLATFORMS
|
20
|
+
ruby
|
21
|
+
|
22
|
+
DEPENDENCIES
|
23
|
+
bundler (~> 1.0.0)
|
24
|
+
rspec (~> 2.1.0)
|
25
|
+
virastar!
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2011 Allen A. Bargi <http://github.com/aziz>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person ob-
|
4
|
+
taining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without restric-
|
6
|
+
tion, including without limitation the rights to use, copy, modi-
|
7
|
+
fy, merge, publish, distribute, sublicense, and/or sell copies of
|
8
|
+
the Software, and to permit persons to whom the Software is fur-
|
9
|
+
nished to do so, subject to the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
16
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONIN-
|
17
|
+
FRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
-----
|
2
|
+
#ویراستار
|
3
|
+
نوشتههای فارسی شما را ویرایش میکند
|
4
|
+
|
5
|
+
-----
|
6
|
+
Virastar (in Persian:ویراستار)
|
7
|
+
|
8
|
+
|
9
|
+
## Specifications
|
10
|
+
|
11
|
+
###Virastar
|
12
|
+
* should add persian_cleanup method to String class
|
13
|
+
* should replace Arabic kaf with its Persian equivalent
|
14
|
+
* should replace Arabic Yeh with its Persian equivalent
|
15
|
+
* should replace Arabic numbers with their Persian equivalent
|
16
|
+
* should replace English numbers with their Persian equivalent
|
17
|
+
* should replace English comma and semicolon with their Persian equivalent
|
18
|
+
* should correct :;,.?! spacing (one space after and no space before)
|
19
|
+
* should replace English quotes with their Persian equivalent
|
20
|
+
* should replace three dots with ellipsis
|
21
|
+
* should convert ه ی to هٔ
|
22
|
+
* should replace double dash to ndash and triple dash to mdash
|
23
|
+
* should replace more than one space with just a single one
|
24
|
+
* should remove unnecessary zwnj chars that are succeeded/preceded by a space
|
25
|
+
* should fix spacing for () [] {} “” «» (one space outside, no space inside)
|
26
|
+
* should replace English percent sign to its Persian equivalent
|
27
|
+
* should replace more that one line breaks with just one
|
28
|
+
* should not replace line breaks
|
29
|
+
* should put zwnj between word and prefix/suffix (ha haye* tar* tarin mi* nemi*)
|
30
|
+
* should not replace English numbers in English phrases
|
31
|
+
|
32
|
+
#### aggressive editing
|
33
|
+
* should replace more than one ! or ? mark with just one
|
34
|
+
* should remove all kashidas
|
35
|
+
|
36
|
+
-----
|
37
|
+
## Install
|
38
|
+
gem install virastar
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
"فارسي را كمی درست تر می نويسيم".persian_cleanup # => "فارسی را کمی درستتر مینویسیم"
|
42
|
+
|
43
|
+
virastar comes with a list of flags to control its behavior, all flags are turned on by default but you can
|
44
|
+
turn them off by passing an options hash to the `persian_cleanup` method
|
45
|
+
|
46
|
+
"سلام 123".persian_cleanup(:fix_english_numbers => false) # => "سلام 123"
|
47
|
+
|
48
|
+
here is the list of all flags:
|
49
|
+
|
50
|
+
* `fix_dashes`
|
51
|
+
* `fix_three_dots`
|
52
|
+
* `fix_english_quotes`
|
53
|
+
* `fix_hamzeh`
|
54
|
+
* `cleanup_zwnj`
|
55
|
+
* `fix_spacing_for_braces_and_quotes`
|
56
|
+
* `fix_arabic_numbers`
|
57
|
+
* `fix_english_numbers`
|
58
|
+
* `fix_misc_non_persian_chars`
|
59
|
+
* `fix_perfix_spacing`
|
60
|
+
* `fix_suffix_spacing`
|
61
|
+
* `aggresive`
|
62
|
+
* `cleanup_kashidas`
|
63
|
+
* `cleanup_extra_marks`
|
64
|
+
* `cleanup_spacing`
|
65
|
+
* `cleanup_begin_and_end`
|
66
|
+
|
67
|
+
## Acknowledgment
|
68
|
+
Virastar is highly inspired by [Virasbaz](http://virasbaz.persianlanguage.ir).
|
69
|
+
|
70
|
+
## Note on Patches/Pull Requests
|
71
|
+
|
72
|
+
* Fork the project.
|
73
|
+
* Make your feature addition or bug fix.
|
74
|
+
* Add tests for it. This is important so I don't break it in a
|
75
|
+
future version unintentionally.
|
76
|
+
* Commit, do not mess with rakefile, version, or history.
|
77
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
78
|
+
* Send me a pull request. Bonus points for topic branches.
|
79
|
+
|
80
|
+
## Copyright
|
81
|
+
|
82
|
+
Copyright (c) 2011 Allen A. Bargi. See LICENSE for details.
|
data/Rakefile
ADDED
data/lib/virastar.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
if RUBY_VERSION.to_f < 1.9
|
3
|
+
require 'jcode'
|
4
|
+
$KCODE = 'u'
|
5
|
+
end
|
6
|
+
|
7
|
+
module Virastar
|
8
|
+
|
9
|
+
class PersianEditor
|
10
|
+
def initialize(text,options)
|
11
|
+
@text = text
|
12
|
+
@fix_dashes = options[:fix_dashes] || true
|
13
|
+
@fix_three_dots = options[:fix_three_dots] || true
|
14
|
+
@fix_english_quotes = options[:fix_english_quotes] || true
|
15
|
+
@fix_hamzeh = options[:fix_hamzeh] || true
|
16
|
+
@cleanup_zwnj = options[:cleanup_zwnj] || true
|
17
|
+
@fix_spacing_for_braces_and_quotes = options[:fix_spacing_for_braces_and_quotes] || true
|
18
|
+
@fix_arabic_numbers = options[:fix_arabic_numbers] || true
|
19
|
+
@fix_english_numbers = options[:fix_english_numbers] || true
|
20
|
+
@fix_misc_non_persian_chars = options[:fix_misc_non_persian_chars] || true
|
21
|
+
@fix_perfix_spacing = options[:fix_perfix_spacing] || true
|
22
|
+
@fix_suffix_spacing = options[:fix_suffix_spacing] || true
|
23
|
+
@aggresive = options[:aggresive] || true
|
24
|
+
@cleanup_kashidas = options[:cleanup_kashidas] || true
|
25
|
+
@cleanup_extra_marks = options[:cleanup_extra_marks] || true
|
26
|
+
@cleanup_spacing = options[:cleanup_spacing] || true
|
27
|
+
@cleanup_begin_and_end = options[:cleanup_begin_and_end] || true
|
28
|
+
end
|
29
|
+
|
30
|
+
def cleanup
|
31
|
+
text = @text
|
32
|
+
# replace double dash to ndash and triple dash to mdash
|
33
|
+
if @fix_dashes
|
34
|
+
text.gsub!(/-{3}/,'—')
|
35
|
+
text.gsub!(/-{2}/,'–')
|
36
|
+
end
|
37
|
+
|
38
|
+
# replace three dots with ellipsis
|
39
|
+
text.gsub!(/\s*\.{3,}/,'…') if @fix_three_dots
|
40
|
+
|
41
|
+
# replace English quotes with their Persian equivalent
|
42
|
+
text.gsub!(/(["'`]+)(.+)(\1)/, '«\2»') if @fix_english_quotes
|
43
|
+
|
44
|
+
# should convert ه ی to ه
|
45
|
+
text.gsub!(/(\S)(ه[\s]+ی)(\s)/, '\1هٔ\3') if @fix_hamzeh
|
46
|
+
|
47
|
+
# remove unnecessary zwnj char that are succeeded/preceded by a space
|
48
|
+
text.gsub!(/\s+|\s+/,' ') if @cleanup_zwnj
|
49
|
+
|
50
|
+
# should fix spacing for () [] {} “” «»
|
51
|
+
if @fix_spacing_for_braces_and_quotes
|
52
|
+
text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
|
53
|
+
text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
|
54
|
+
text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
|
55
|
+
text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
|
56
|
+
text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
|
57
|
+
end
|
58
|
+
|
59
|
+
# character replacement
|
60
|
+
persian_numbers = "۱۲۳۴۵۶۷۸۹۰"
|
61
|
+
arabic_numbers = "١٢٣٤٥٦٧٨٩٠"
|
62
|
+
english_numbers = "1234567890"
|
63
|
+
bad_chars = ",;كي%"
|
64
|
+
good_chars = "،؛کی٪"
|
65
|
+
text.tr!(english_numbers,persian_numbers) if @fix_english_numbers
|
66
|
+
text.tr!(arabic_numbers,persian_numbers) if @fix_arabic_numbers
|
67
|
+
text.tr!(bad_chars,good_chars) if @fix_misc_non_persian_chars
|
68
|
+
|
69
|
+
# should not replace exnglish chars in english phrases
|
70
|
+
text.gsub!(/([a-z\-_]+[۰-۹]+|[۰-۹]+[a-z\-_]+)/i) do |s|
|
71
|
+
s.tr(persian_numbers,english_numbers)
|
72
|
+
end
|
73
|
+
|
74
|
+
# put zwnj between word and prefix (mi* nemi*)
|
75
|
+
# there's a possible bug here: می and نمی could be separate nouns and not prefix
|
76
|
+
if @fix_perfix_spacing
|
77
|
+
text.gsub!(/\s+(ن?می)\s+/,' \1')
|
78
|
+
end
|
79
|
+
|
80
|
+
# put zwnj between word and suffix (*tar *tarin *ha *haye)
|
81
|
+
# there's a possible bug here: های and تر could be separate nouns and not suffix
|
82
|
+
if @fix_suffix_spacing
|
83
|
+
text.gsub!(/\s+(تر(ین)?|ها(ی)?)\s+/,'\1 ')
|
84
|
+
end
|
85
|
+
|
86
|
+
# -- Aggressive Editing ------------------------------------------
|
87
|
+
if @aggresive
|
88
|
+
|
89
|
+
# replace more than one ! or ? mark with just one
|
90
|
+
if @cleanup_extra_marks
|
91
|
+
text.gsub!(/(!){2,}/, '\1')
|
92
|
+
text.gsub!(/(؟){2,}/, '\1')
|
93
|
+
end
|
94
|
+
|
95
|
+
# should remove all kashida
|
96
|
+
text.gsub!(/ـ+/,"") if @cleanup_kashidas
|
97
|
+
|
98
|
+
end
|
99
|
+
# ----------------------------------------------------------------
|
100
|
+
|
101
|
+
# : ; , . ! ? and their persian equivalents should have one space after and no space before
|
102
|
+
if @fix_spacing_for_braces_and_quotes
|
103
|
+
text.gsub!(/\s*([:;,؛،.؟!]{1})\s*/, '\1 ')
|
104
|
+
end
|
105
|
+
|
106
|
+
# should replace more than one space with just a single one
|
107
|
+
if @cleanup_spacing
|
108
|
+
text.gsub!(/[ ]+/,' ')
|
109
|
+
text.gsub!(/\s*[\n]+\s*/," \n")
|
110
|
+
end
|
111
|
+
|
112
|
+
# remove spaces, tabs, and new lines from the beginning and enf of file
|
113
|
+
text.strip! if @cleanup_begin_and_end
|
114
|
+
|
115
|
+
text
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
module VirastarStingExtensions
|
122
|
+
def persian_cleanup(options = {})
|
123
|
+
editor = Virastar::PersianEditor.new(self,options)
|
124
|
+
return editor.cleanup
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
String.send(:include, VirastarStingExtensions)
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe Virastar do
|
5
|
+
|
6
|
+
it "should add persian_cleanup method to String class" do
|
7
|
+
test = "test string"
|
8
|
+
test.should respond_to(:persian_cleanup)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should replace Arabic kaf with its Persian equivalent" do
|
12
|
+
test = "ك"
|
13
|
+
test2 = "كمك"
|
14
|
+
result = "ک"
|
15
|
+
result2 = "کمک"
|
16
|
+
test.persian_cleanup.should == result
|
17
|
+
test2.persian_cleanup.should == result2
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should replace Arabic Yeh with its Persian equivalent" do
|
21
|
+
test = "ي"
|
22
|
+
test2 = "بيني"
|
23
|
+
result = "ی"
|
24
|
+
result2 = "بینی"
|
25
|
+
test.persian_cleanup.should == result
|
26
|
+
test2.persian_cleanup.should == result2
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should replace Arabic numbers with their Persian equivalent" do
|
30
|
+
test = "٠١٢٣٤٥٦٧٨٩"
|
31
|
+
result = "۰۱۲۳۴۵۶۷۸۹"
|
32
|
+
test.persian_cleanup.should == result
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should replace English numbers with their Persian equivalent" do
|
36
|
+
test = "0123456789"
|
37
|
+
result = "۰۱۲۳۴۵۶۷۸۹"
|
38
|
+
test.persian_cleanup.should == result
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should replace English comma and semicolon with their Persian equivalent" do
|
42
|
+
test = ";,"
|
43
|
+
result = "؛ ،"
|
44
|
+
test.persian_cleanup.should == result
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should correct :;,.?! spacing (one space after and no space before)" do
|
48
|
+
test = "گفت : سلام"
|
49
|
+
result = "گفت: سلام"
|
50
|
+
#puts Diffy::Diff.new(test, result).to_s(:color) # TODO: char diff
|
51
|
+
test.persian_cleanup.should == result
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should replace English quotes with their Persian equivalent" do
|
55
|
+
test = "''تست''"
|
56
|
+
test2 = "'تست'"
|
57
|
+
test3 = "\"گفت: سلام\""
|
58
|
+
test4 = "`تست`"
|
59
|
+
test5 = "``تست``"
|
60
|
+
result = result2 = result4 = result5 = "«تست»"
|
61
|
+
result3 = "«گفت: سلام»"
|
62
|
+
test.persian_cleanup.should == result
|
63
|
+
test2.persian_cleanup.should == result2
|
64
|
+
test3.persian_cleanup.should == result3
|
65
|
+
test4.persian_cleanup.should == result4
|
66
|
+
test5.persian_cleanup.should == result5
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should replace three dots with ellipsis" do
|
70
|
+
test = "..."
|
71
|
+
result = "…"
|
72
|
+
test2 = "...."
|
73
|
+
result2 = "…"
|
74
|
+
test3 = "خداحافظ ... به به"
|
75
|
+
result3 = "خداحافظ… به به"
|
76
|
+
test4 = "........."
|
77
|
+
result4 = "…"
|
78
|
+
test.persian_cleanup.should == result
|
79
|
+
test2.persian_cleanup.should == result2
|
80
|
+
test3.persian_cleanup.should == result3
|
81
|
+
test4.persian_cleanup.should == result4
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should convert ه ی to هٔ" do
|
85
|
+
test = "خانه ی ما"
|
86
|
+
test2 = "خانه ی ما"
|
87
|
+
result = result2 = "خانهٔ ما"
|
88
|
+
test.persian_cleanup.should == result
|
89
|
+
test2.persian_cleanup.should == result2
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should replace double dash to ndash and triple dash to mdash" do
|
93
|
+
test = "--"
|
94
|
+
test2 = "---"
|
95
|
+
result = "–"
|
96
|
+
result2 = "—"
|
97
|
+
test.persian_cleanup.should == result
|
98
|
+
test2.persian_cleanup.should == result2
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should replace more than one space with just a single one" do
|
102
|
+
test = " hello world! I'm virastar "
|
103
|
+
result = "hello world! I'm virastar"
|
104
|
+
test.persian_cleanup.should == result
|
105
|
+
end
|
106
|
+
|
107
|
+
it "should remove unnecessary zwnj chars that are succeeded/preceded by a space" do
|
108
|
+
test = "سلام دنیا" # before
|
109
|
+
result = "سلام دنیا"
|
110
|
+
test2 = "سلام دنیا" #after
|
111
|
+
result2 = "سلام دنیا"
|
112
|
+
test.persian_cleanup.should == result
|
113
|
+
test2.persian_cleanup.should == result2
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should fix spacing for () [] {} “” «» (one space outside, no space inside)" do
|
117
|
+
[ ["(",")"],["[","]"],["{","}"],["“","”"],["«","»"] ].each do |b|
|
118
|
+
test = "this is#{b[0]} a test#{b[1]}"
|
119
|
+
test2 = "this is #{b[0]} a test #{b[1]}"
|
120
|
+
test3 = "this is #{b[0]} a test #{b[1]} yeah!"
|
121
|
+
test4 = "this is #{b[0]}a test #{b[1]} yeah!"
|
122
|
+
result = "this is #{b[0]}a test#{b[1]}"
|
123
|
+
result2 = "this is #{b[0]}a test#{b[1]}"
|
124
|
+
result3 = "this is #{b[0]}a test#{b[1]} yeah!"
|
125
|
+
result4 = "this is #{b[0]}a test#{b[1]} yeah!"
|
126
|
+
test.persian_cleanup.should == result
|
127
|
+
test2.persian_cleanup.should == result2
|
128
|
+
test3.persian_cleanup.should == result3
|
129
|
+
test4.persian_cleanup.should == result4
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should replace English percent sign to its Persian equivalent" do
|
134
|
+
test = "%"
|
135
|
+
result = "٪"
|
136
|
+
test.persian_cleanup.should == result
|
137
|
+
end
|
138
|
+
|
139
|
+
it "should replace more that one line breaks with just one" do
|
140
|
+
test = "this is \n \n \n \n a test"
|
141
|
+
result = "this is \na test"
|
142
|
+
test2 = "this is\n\n\n\na test"
|
143
|
+
result2 = "this is \na test"
|
144
|
+
test3 = "this is \n\n\n\n a test"
|
145
|
+
result3 = "this is \na test"
|
146
|
+
|
147
|
+
test.persian_cleanup.should == result
|
148
|
+
test2.persian_cleanup.should == result2
|
149
|
+
test3.persian_cleanup.should == result3
|
150
|
+
end
|
151
|
+
|
152
|
+
it "should not replace line breaks" do
|
153
|
+
test = "this is \n a test"
|
154
|
+
result = "this is \na test"
|
155
|
+
test.persian_cleanup.should == result
|
156
|
+
end
|
157
|
+
|
158
|
+
it "should put zwnj between word and prefix/suffix (ha haye* tar* tarin mi* nemi*)" do
|
159
|
+
test = "ما می توانیم"
|
160
|
+
result = "ما میتوانیم"
|
161
|
+
test2 = "ما نمی توانیم"
|
162
|
+
result2 = "ما نمیتوانیم"
|
163
|
+
test3 = "این بهترین کتاب ها است"
|
164
|
+
result3 = "این بهترین کتابها است"
|
165
|
+
test4 = "بزرگ تر و قدرتمند ترین زبان های دنیا"
|
166
|
+
result4 = "بزرگتر و قدرتمندترین زبانهای دنیا"
|
167
|
+
test.persian_cleanup.should == result
|
168
|
+
end
|
169
|
+
|
170
|
+
it "should not replace English numbers in English phrases" do
|
171
|
+
test = "عزیز ATM74 در IBM-96 085 B 95BCS"
|
172
|
+
result = "عزیز ATM74 در IBM-96 ۰۸۵ B 95BCS"
|
173
|
+
test.persian_cleanup.should == result
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
context "aggressive editing" do
|
178
|
+
it "should replace more than one ! or ? mark with just one" do
|
179
|
+
test = "salam!!!"
|
180
|
+
result = "salam!"
|
181
|
+
test2 = "چطور؟؟؟"
|
182
|
+
result2 = "چطور؟"
|
183
|
+
test.persian_cleanup.should == result
|
184
|
+
test2.persian_cleanup.should == result2
|
185
|
+
end
|
186
|
+
|
187
|
+
it "should remove all kashida" do
|
188
|
+
test = "سلامـــت"
|
189
|
+
result = "سلامت"
|
190
|
+
test.persian_cleanup.should == result
|
191
|
+
end
|
192
|
+
|
193
|
+
it "should correct wrong connections like in میشود or میدهد"
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
data/virastar.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "virastar/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "virastar"
|
7
|
+
s.version = Virastar::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Allen A. Bargi"]
|
10
|
+
s.email = ["allen.bargi@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/aziz/virastar"
|
12
|
+
s.summary = %q{cleanning up Persian text!}
|
13
|
+
s.description = %q{cleanning up Persian text!}
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = "virastar"
|
19
|
+
s.extra_rdoc_files = [ "LICENSE", "README.md"]
|
20
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
21
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.1.0"])
|
22
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: virastar
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Allen A. Bargi
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-01-19 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rspec
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 11
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 1
|
33
|
+
- 0
|
34
|
+
version: 2.1.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: bundler
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 23
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 0
|
49
|
+
- 0
|
50
|
+
version: 1.0.0
|
51
|
+
type: :development
|
52
|
+
version_requirements: *id002
|
53
|
+
description: cleanning up Persian text!
|
54
|
+
email:
|
55
|
+
- allen.bargi@gmail.com
|
56
|
+
executables: []
|
57
|
+
|
58
|
+
extensions: []
|
59
|
+
|
60
|
+
extra_rdoc_files:
|
61
|
+
- LICENSE
|
62
|
+
- README.md
|
63
|
+
files:
|
64
|
+
- .gitignore
|
65
|
+
- Gemfile
|
66
|
+
- Gemfile.lock
|
67
|
+
- LICENSE
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- lib/virastar.rb
|
71
|
+
- lib/virastar/version.rb
|
72
|
+
- spec/spec_helper.rb
|
73
|
+
- spec/virastar_spec.rb
|
74
|
+
- virastar.gemspec
|
75
|
+
has_rdoc: true
|
76
|
+
homepage: http://github.com/aziz/virastar
|
77
|
+
licenses: []
|
78
|
+
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options:
|
81
|
+
- --charset=UTF-8
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
hash: 3
|
90
|
+
segments:
|
91
|
+
- 0
|
92
|
+
version: "0"
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
hash: 3
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
requirements: []
|
103
|
+
|
104
|
+
rubyforge_project: virastar
|
105
|
+
rubygems_version: 1.4.2
|
106
|
+
signing_key:
|
107
|
+
specification_version: 3
|
108
|
+
summary: cleanning up Persian text!
|
109
|
+
test_files:
|
110
|
+
- spec/spec_helper.rb
|
111
|
+
- spec/virastar_spec.rb
|