virastar 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +25 -0
- data/LICENSE +21 -0
- data/README.md +82 -0
- data/Rakefile +2 -0
- data/lib/virastar/version.rb +3 -0
- data/lib/virastar.rb +128 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/virastar_spec.rb +196 -0
- data/virastar.gemspec +23 -0
- metadata +111 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
virastar (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.2)
|
10
|
+
rspec (2.1.0)
|
11
|
+
rspec-core (~> 2.1.0)
|
12
|
+
rspec-expectations (~> 2.1.0)
|
13
|
+
rspec-mocks (~> 2.1.0)
|
14
|
+
rspec-core (2.1.0)
|
15
|
+
rspec-expectations (2.1.0)
|
16
|
+
diff-lcs (~> 1.1.2)
|
17
|
+
rspec-mocks (2.1.0)
|
18
|
+
|
19
|
+
PLATFORMS
|
20
|
+
ruby
|
21
|
+
|
22
|
+
DEPENDENCIES
|
23
|
+
bundler (~> 1.0.0)
|
24
|
+
rspec (~> 2.1.0)
|
25
|
+
virastar!
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2011 Allen A. Bargi <http://github.com/aziz>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person ob-
|
4
|
+
taining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without restric-
|
6
|
+
tion, including without limitation the rights to use, copy, modi-
|
7
|
+
fy, merge, publish, distribute, sublicense, and/or sell copies of
|
8
|
+
the Software, and to permit persons to whom the Software is fur-
|
9
|
+
nished to do so, subject to the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
16
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONIN-
|
17
|
+
FRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
-----
|
2
|
+
#ویراستار
|
3
|
+
نوشتههای فارسی شما را ویرایش میکند
|
4
|
+
|
5
|
+
-----
|
6
|
+
Virastar (in Persian:ویراستار)
|
7
|
+
|
8
|
+
|
9
|
+
## Specifications
|
10
|
+
|
11
|
+
###Virastar
|
12
|
+
* should add persian_cleanup method to String class
|
13
|
+
* should replace Arabic kaf with its Persian equivalent
|
14
|
+
* should replace Arabic Yeh with its Persian equivalent
|
15
|
+
* should replace Arabic numbers with their Persian equivalent
|
16
|
+
* should replace English numbers with their Persian equivalent
|
17
|
+
* should replace English comma and semicolon with their Persian equivalent
|
18
|
+
* should correct :;,.?! spacing (one space after and no space before)
|
19
|
+
* should replace English quotes with their Persian equivalent
|
20
|
+
* should replace three dots with ellipsis
|
21
|
+
* should convert ه ی to هٔ
|
22
|
+
* should replace double dash to ndash and triple dash to mdash
|
23
|
+
* should replace more than one space with just a single one
|
24
|
+
* should remove unnecessary zwnj chars that are succeeded/preceded by a space
|
25
|
+
* should fix spacing for () [] {} “” «» (one space outside, no space inside)
|
26
|
+
* should replace English percent sign to its Persian equivalent
|
27
|
+
* should replace more that one line breaks with just one
|
28
|
+
* should not replace line breaks
|
29
|
+
* should put zwnj between word and prefix/suffix (ha haye* tar* tarin mi* nemi*)
|
30
|
+
* should not replace English numbers in English phrases
|
31
|
+
|
32
|
+
#### aggressive editing
|
33
|
+
* should replace more than one ! or ? mark with just one
|
34
|
+
* should remove all kashidas
|
35
|
+
|
36
|
+
-----
|
37
|
+
## Install
|
38
|
+
gem install virastar
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
"فارسي را كمی درست تر می نويسيم".persian_cleanup # => "فارسی را کمی درستتر مینویسیم"
|
42
|
+
|
43
|
+
virastar comes with a list of flags to control its behavior, all flags are turned on by default but you can
|
44
|
+
turn them off by passing an options hash to the `persian_cleanup` method
|
45
|
+
|
46
|
+
"سلام 123".persian_cleanup(:fix_english_numbers => false) # => "سلام 123"
|
47
|
+
|
48
|
+
here is the list of all flags:
|
49
|
+
|
50
|
+
* `fix_dashes`
|
51
|
+
* `fix_three_dots`
|
52
|
+
* `fix_english_quotes`
|
53
|
+
* `fix_hamzeh`
|
54
|
+
* `cleanup_zwnj`
|
55
|
+
* `fix_spacing_for_braces_and_quotes`
|
56
|
+
* `fix_arabic_numbers`
|
57
|
+
* `fix_english_numbers`
|
58
|
+
* `fix_misc_non_persian_chars`
|
59
|
+
* `fix_perfix_spacing`
|
60
|
+
* `fix_suffix_spacing`
|
61
|
+
* `aggresive`
|
62
|
+
* `cleanup_kashidas`
|
63
|
+
* `cleanup_extra_marks`
|
64
|
+
* `cleanup_spacing`
|
65
|
+
* `cleanup_begin_and_end`
|
66
|
+
|
67
|
+
## Acknowledgment
|
68
|
+
Virastar is highly inspired by [Virasbaz](http://virasbaz.persianlanguage.ir).
|
69
|
+
|
70
|
+
## Note on Patches/Pull Requests
|
71
|
+
|
72
|
+
* Fork the project.
|
73
|
+
* Make your feature addition or bug fix.
|
74
|
+
* Add tests for it. This is important so I don't break it in a
|
75
|
+
future version unintentionally.
|
76
|
+
* Commit, do not mess with rakefile, version, or history.
|
77
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
78
|
+
* Send me a pull request. Bonus points for topic branches.
|
79
|
+
|
80
|
+
## Copyright
|
81
|
+
|
82
|
+
Copyright (c) 2011 Allen A. Bargi. See LICENSE for details.
|
data/Rakefile
ADDED
data/lib/virastar.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
if RUBY_VERSION.to_f < 1.9
|
3
|
+
require 'jcode'
|
4
|
+
$KCODE = 'u'
|
5
|
+
end
|
6
|
+
|
7
|
+
module Virastar
|
8
|
+
|
9
|
+
class PersianEditor
|
10
|
+
def initialize(text,options)
|
11
|
+
@text = text
|
12
|
+
@fix_dashes = options[:fix_dashes] || true
|
13
|
+
@fix_three_dots = options[:fix_three_dots] || true
|
14
|
+
@fix_english_quotes = options[:fix_english_quotes] || true
|
15
|
+
@fix_hamzeh = options[:fix_hamzeh] || true
|
16
|
+
@cleanup_zwnj = options[:cleanup_zwnj] || true
|
17
|
+
@fix_spacing_for_braces_and_quotes = options[:fix_spacing_for_braces_and_quotes] || true
|
18
|
+
@fix_arabic_numbers = options[:fix_arabic_numbers] || true
|
19
|
+
@fix_english_numbers = options[:fix_english_numbers] || true
|
20
|
+
@fix_misc_non_persian_chars = options[:fix_misc_non_persian_chars] || true
|
21
|
+
@fix_perfix_spacing = options[:fix_perfix_spacing] || true
|
22
|
+
@fix_suffix_spacing = options[:fix_suffix_spacing] || true
|
23
|
+
@aggresive = options[:aggresive] || true
|
24
|
+
@cleanup_kashidas = options[:cleanup_kashidas] || true
|
25
|
+
@cleanup_extra_marks = options[:cleanup_extra_marks] || true
|
26
|
+
@cleanup_spacing = options[:cleanup_spacing] || true
|
27
|
+
@cleanup_begin_and_end = options[:cleanup_begin_and_end] || true
|
28
|
+
end
|
29
|
+
|
30
|
+
def cleanup
|
31
|
+
text = @text
|
32
|
+
# replace double dash to ndash and triple dash to mdash
|
33
|
+
if @fix_dashes
|
34
|
+
text.gsub!(/-{3}/,'—')
|
35
|
+
text.gsub!(/-{2}/,'–')
|
36
|
+
end
|
37
|
+
|
38
|
+
# replace three dots with ellipsis
|
39
|
+
text.gsub!(/\s*\.{3,}/,'…') if @fix_three_dots
|
40
|
+
|
41
|
+
# replace English quotes with their Persian equivalent
|
42
|
+
text.gsub!(/(["'`]+)(.+)(\1)/, '«\2»') if @fix_english_quotes
|
43
|
+
|
44
|
+
# should convert ه ی to ه
|
45
|
+
text.gsub!(/(\S)(ه[\s]+ی)(\s)/, '\1هٔ\3') if @fix_hamzeh
|
46
|
+
|
47
|
+
# remove unnecessary zwnj char that are succeeded/preceded by a space
|
48
|
+
text.gsub!(/\s+|\s+/,' ') if @cleanup_zwnj
|
49
|
+
|
50
|
+
# should fix spacing for () [] {} “” «»
|
51
|
+
if @fix_spacing_for_braces_and_quotes
|
52
|
+
text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
|
53
|
+
text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
|
54
|
+
text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
|
55
|
+
text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
|
56
|
+
text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
|
57
|
+
end
|
58
|
+
|
59
|
+
# character replacement
|
60
|
+
persian_numbers = "۱۲۳۴۵۶۷۸۹۰"
|
61
|
+
arabic_numbers = "١٢٣٤٥٦٧٨٩٠"
|
62
|
+
english_numbers = "1234567890"
|
63
|
+
bad_chars = ",;كي%"
|
64
|
+
good_chars = "،؛کی٪"
|
65
|
+
text.tr!(english_numbers,persian_numbers) if @fix_english_numbers
|
66
|
+
text.tr!(arabic_numbers,persian_numbers) if @fix_arabic_numbers
|
67
|
+
text.tr!(bad_chars,good_chars) if @fix_misc_non_persian_chars
|
68
|
+
|
69
|
+
# should not replace exnglish chars in english phrases
|
70
|
+
text.gsub!(/([a-z\-_]+[۰-۹]+|[۰-۹]+[a-z\-_]+)/i) do |s|
|
71
|
+
s.tr(persian_numbers,english_numbers)
|
72
|
+
end
|
73
|
+
|
74
|
+
# put zwnj between word and prefix (mi* nemi*)
|
75
|
+
# there's a possible bug here: می and نمی could be separate nouns and not prefix
|
76
|
+
if @fix_perfix_spacing
|
77
|
+
text.gsub!(/\s+(ن?می)\s+/,' \1')
|
78
|
+
end
|
79
|
+
|
80
|
+
# put zwnj between word and suffix (*tar *tarin *ha *haye)
|
81
|
+
# there's a possible bug here: های and تر could be separate nouns and not suffix
|
82
|
+
if @fix_suffix_spacing
|
83
|
+
text.gsub!(/\s+(تر(ین)?|ها(ی)?)\s+/,'\1 ')
|
84
|
+
end
|
85
|
+
|
86
|
+
# -- Aggressive Editing ------------------------------------------
|
87
|
+
if @aggresive
|
88
|
+
|
89
|
+
# replace more than one ! or ? mark with just one
|
90
|
+
if @cleanup_extra_marks
|
91
|
+
text.gsub!(/(!){2,}/, '\1')
|
92
|
+
text.gsub!(/(؟){2,}/, '\1')
|
93
|
+
end
|
94
|
+
|
95
|
+
# should remove all kashida
|
96
|
+
text.gsub!(/ـ+/,"") if @cleanup_kashidas
|
97
|
+
|
98
|
+
end
|
99
|
+
# ----------------------------------------------------------------
|
100
|
+
|
101
|
+
# : ; , . ! ? and their persian equivalents should have one space after and no space before
|
102
|
+
if @fix_spacing_for_braces_and_quotes
|
103
|
+
text.gsub!(/\s*([:;,؛،.؟!]{1})\s*/, '\1 ')
|
104
|
+
end
|
105
|
+
|
106
|
+
# should replace more than one space with just a single one
|
107
|
+
if @cleanup_spacing
|
108
|
+
text.gsub!(/[ ]+/,' ')
|
109
|
+
text.gsub!(/\s*[\n]+\s*/," \n")
|
110
|
+
end
|
111
|
+
|
112
|
+
# remove spaces, tabs, and new lines from the beginning and enf of file
|
113
|
+
text.strip! if @cleanup_begin_and_end
|
114
|
+
|
115
|
+
text
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
module VirastarStingExtensions
|
122
|
+
def persian_cleanup(options = {})
|
123
|
+
editor = Virastar::PersianEditor.new(self,options)
|
124
|
+
return editor.cleanup
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
String.send(:include, VirastarStingExtensions)
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe Virastar do
|
5
|
+
|
6
|
+
it "should add persian_cleanup method to String class" do
|
7
|
+
test = "test string"
|
8
|
+
test.should respond_to(:persian_cleanup)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should replace Arabic kaf with its Persian equivalent" do
|
12
|
+
test = "ك"
|
13
|
+
test2 = "كمك"
|
14
|
+
result = "ک"
|
15
|
+
result2 = "کمک"
|
16
|
+
test.persian_cleanup.should == result
|
17
|
+
test2.persian_cleanup.should == result2
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should replace Arabic Yeh with its Persian equivalent" do
|
21
|
+
test = "ي"
|
22
|
+
test2 = "بيني"
|
23
|
+
result = "ی"
|
24
|
+
result2 = "بینی"
|
25
|
+
test.persian_cleanup.should == result
|
26
|
+
test2.persian_cleanup.should == result2
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should replace Arabic numbers with their Persian equivalent" do
|
30
|
+
test = "٠١٢٣٤٥٦٧٨٩"
|
31
|
+
result = "۰۱۲۳۴۵۶۷۸۹"
|
32
|
+
test.persian_cleanup.should == result
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should replace English numbers with their Persian equivalent" do
|
36
|
+
test = "0123456789"
|
37
|
+
result = "۰۱۲۳۴۵۶۷۸۹"
|
38
|
+
test.persian_cleanup.should == result
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should replace English comma and semicolon with their Persian equivalent" do
|
42
|
+
test = ";,"
|
43
|
+
result = "؛ ،"
|
44
|
+
test.persian_cleanup.should == result
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should correct :;,.?! spacing (one space after and no space before)" do
|
48
|
+
test = "گفت : سلام"
|
49
|
+
result = "گفت: سلام"
|
50
|
+
#puts Diffy::Diff.new(test, result).to_s(:color) # TODO: char diff
|
51
|
+
test.persian_cleanup.should == result
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should replace English quotes with their Persian equivalent" do
|
55
|
+
test = "''تست''"
|
56
|
+
test2 = "'تست'"
|
57
|
+
test3 = "\"گفت: سلام\""
|
58
|
+
test4 = "`تست`"
|
59
|
+
test5 = "``تست``"
|
60
|
+
result = result2 = result4 = result5 = "«تست»"
|
61
|
+
result3 = "«گفت: سلام»"
|
62
|
+
test.persian_cleanup.should == result
|
63
|
+
test2.persian_cleanup.should == result2
|
64
|
+
test3.persian_cleanup.should == result3
|
65
|
+
test4.persian_cleanup.should == result4
|
66
|
+
test5.persian_cleanup.should == result5
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should replace three dots with ellipsis" do
|
70
|
+
test = "..."
|
71
|
+
result = "…"
|
72
|
+
test2 = "...."
|
73
|
+
result2 = "…"
|
74
|
+
test3 = "خداحافظ ... به به"
|
75
|
+
result3 = "خداحافظ… به به"
|
76
|
+
test4 = "........."
|
77
|
+
result4 = "…"
|
78
|
+
test.persian_cleanup.should == result
|
79
|
+
test2.persian_cleanup.should == result2
|
80
|
+
test3.persian_cleanup.should == result3
|
81
|
+
test4.persian_cleanup.should == result4
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should convert ه ی to هٔ" do
|
85
|
+
test = "خانه ی ما"
|
86
|
+
test2 = "خانه ی ما"
|
87
|
+
result = result2 = "خانهٔ ما"
|
88
|
+
test.persian_cleanup.should == result
|
89
|
+
test2.persian_cleanup.should == result2
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should replace double dash to ndash and triple dash to mdash" do
|
93
|
+
test = "--"
|
94
|
+
test2 = "---"
|
95
|
+
result = "–"
|
96
|
+
result2 = "—"
|
97
|
+
test.persian_cleanup.should == result
|
98
|
+
test2.persian_cleanup.should == result2
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should replace more than one space with just a single one" do
|
102
|
+
test = " hello world! I'm virastar "
|
103
|
+
result = "hello world! I'm virastar"
|
104
|
+
test.persian_cleanup.should == result
|
105
|
+
end
|
106
|
+
|
107
|
+
it "should remove unnecessary zwnj chars that are succeeded/preceded by a space" do
|
108
|
+
test = "سلام دنیا" # before
|
109
|
+
result = "سلام دنیا"
|
110
|
+
test2 = "سلام دنیا" #after
|
111
|
+
result2 = "سلام دنیا"
|
112
|
+
test.persian_cleanup.should == result
|
113
|
+
test2.persian_cleanup.should == result2
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should fix spacing for () [] {} “” «» (one space outside, no space inside)" do
|
117
|
+
[ ["(",")"],["[","]"],["{","}"],["“","”"],["«","»"] ].each do |b|
|
118
|
+
test = "this is#{b[0]} a test#{b[1]}"
|
119
|
+
test2 = "this is #{b[0]} a test #{b[1]}"
|
120
|
+
test3 = "this is #{b[0]} a test #{b[1]} yeah!"
|
121
|
+
test4 = "this is #{b[0]}a test #{b[1]} yeah!"
|
122
|
+
result = "this is #{b[0]}a test#{b[1]}"
|
123
|
+
result2 = "this is #{b[0]}a test#{b[1]}"
|
124
|
+
result3 = "this is #{b[0]}a test#{b[1]} yeah!"
|
125
|
+
result4 = "this is #{b[0]}a test#{b[1]} yeah!"
|
126
|
+
test.persian_cleanup.should == result
|
127
|
+
test2.persian_cleanup.should == result2
|
128
|
+
test3.persian_cleanup.should == result3
|
129
|
+
test4.persian_cleanup.should == result4
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should replace English percent sign to its Persian equivalent" do
|
134
|
+
test = "%"
|
135
|
+
result = "٪"
|
136
|
+
test.persian_cleanup.should == result
|
137
|
+
end
|
138
|
+
|
139
|
+
it "should replace more that one line breaks with just one" do
|
140
|
+
test = "this is \n \n \n \n a test"
|
141
|
+
result = "this is \na test"
|
142
|
+
test2 = "this is\n\n\n\na test"
|
143
|
+
result2 = "this is \na test"
|
144
|
+
test3 = "this is \n\n\n\n a test"
|
145
|
+
result3 = "this is \na test"
|
146
|
+
|
147
|
+
test.persian_cleanup.should == result
|
148
|
+
test2.persian_cleanup.should == result2
|
149
|
+
test3.persian_cleanup.should == result3
|
150
|
+
end
|
151
|
+
|
152
|
+
it "should not replace line breaks" do
|
153
|
+
test = "this is \n a test"
|
154
|
+
result = "this is \na test"
|
155
|
+
test.persian_cleanup.should == result
|
156
|
+
end
|
157
|
+
|
158
|
+
it "should put zwnj between word and prefix/suffix (ha haye* tar* tarin mi* nemi*)" do
|
159
|
+
test = "ما می توانیم"
|
160
|
+
result = "ما میتوانیم"
|
161
|
+
test2 = "ما نمی توانیم"
|
162
|
+
result2 = "ما نمیتوانیم"
|
163
|
+
test3 = "این بهترین کتاب ها است"
|
164
|
+
result3 = "این بهترین کتابها است"
|
165
|
+
test4 = "بزرگ تر و قدرتمند ترین زبان های دنیا"
|
166
|
+
result4 = "بزرگتر و قدرتمندترین زبانهای دنیا"
|
167
|
+
test.persian_cleanup.should == result
|
168
|
+
end
|
169
|
+
|
170
|
+
it "should not replace English numbers in English phrases" do
|
171
|
+
test = "عزیز ATM74 در IBM-96 085 B 95BCS"
|
172
|
+
result = "عزیز ATM74 در IBM-96 ۰۸۵ B 95BCS"
|
173
|
+
test.persian_cleanup.should == result
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
context "aggressive editing" do
|
178
|
+
it "should replace more than one ! or ? mark with just one" do
|
179
|
+
test = "salam!!!"
|
180
|
+
result = "salam!"
|
181
|
+
test2 = "چطور؟؟؟"
|
182
|
+
result2 = "چطور؟"
|
183
|
+
test.persian_cleanup.should == result
|
184
|
+
test2.persian_cleanup.should == result2
|
185
|
+
end
|
186
|
+
|
187
|
+
it "should remove all kashida" do
|
188
|
+
test = "سلامـــت"
|
189
|
+
result = "سلامت"
|
190
|
+
test.persian_cleanup.should == result
|
191
|
+
end
|
192
|
+
|
193
|
+
it "should correct wrong connections like in میشود or میدهد"
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
data/virastar.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "virastar/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "virastar"
|
7
|
+
s.version = Virastar::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Allen A. Bargi"]
|
10
|
+
s.email = ["allen.bargi@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/aziz/virastar"
|
12
|
+
s.summary = %q{cleanning up Persian text!}
|
13
|
+
s.description = %q{cleanning up Persian text!}
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = "virastar"
|
19
|
+
s.extra_rdoc_files = [ "LICENSE", "README.md"]
|
20
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
21
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.1.0"])
|
22
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: virastar
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Allen A. Bargi
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-01-19 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rspec
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 11
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 1
|
33
|
+
- 0
|
34
|
+
version: 2.1.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: bundler
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 23
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 0
|
49
|
+
- 0
|
50
|
+
version: 1.0.0
|
51
|
+
type: :development
|
52
|
+
version_requirements: *id002
|
53
|
+
description: cleanning up Persian text!
|
54
|
+
email:
|
55
|
+
- allen.bargi@gmail.com
|
56
|
+
executables: []
|
57
|
+
|
58
|
+
extensions: []
|
59
|
+
|
60
|
+
extra_rdoc_files:
|
61
|
+
- LICENSE
|
62
|
+
- README.md
|
63
|
+
files:
|
64
|
+
- .gitignore
|
65
|
+
- Gemfile
|
66
|
+
- Gemfile.lock
|
67
|
+
- LICENSE
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- lib/virastar.rb
|
71
|
+
- lib/virastar/version.rb
|
72
|
+
- spec/spec_helper.rb
|
73
|
+
- spec/virastar_spec.rb
|
74
|
+
- virastar.gemspec
|
75
|
+
has_rdoc: true
|
76
|
+
homepage: http://github.com/aziz/virastar
|
77
|
+
licenses: []
|
78
|
+
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options:
|
81
|
+
- --charset=UTF-8
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
hash: 3
|
90
|
+
segments:
|
91
|
+
- 0
|
92
|
+
version: "0"
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
hash: 3
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
requirements: []
|
103
|
+
|
104
|
+
rubyforge_project: virastar
|
105
|
+
rubygems_version: 1.4.2
|
106
|
+
signing_key:
|
107
|
+
specification_version: 3
|
108
|
+
summary: cleanning up Persian text!
|
109
|
+
test_files:
|
110
|
+
- spec/spec_helper.rb
|
111
|
+
- spec/virastar_spec.rb
|