virastar 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO +4 -6
- data/lib/virastar.rb +14 -10
- data/lib/virastar/version.rb +1 -1
- data/spec/virastar_spec.rb +31 -7
- metadata +4 -4
data/TODO
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
-
|
2
|
-
- spacing after , : ; causing a lot of problem in this case (,) => (, )
|
1
|
+
- destories /n if the line ends with ""
|
3
2
|
- do not destroy urls dots and colons
|
3
|
+
/https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?/
|
4
4
|
|
5
5
|
- translate to js
|
6
6
|
|
7
7
|
|
8
8
|
DONE:
|
9
|
-
-
|
10
|
-
-
|
11
|
-
- for suffixes also consider tari
|
12
|
-
- (IMP) do not replace \n probably caused by fixing spaces after dots and commas
|
9
|
+
- spacing after , : ; causing a lot of problem in this case (,) => (, )
|
10
|
+
- do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
|
data/lib/virastar.rb
CHANGED
@@ -47,15 +47,6 @@ module Virastar
|
|
47
47
|
# remove unnecessary zwnj char that are succeeded/preceded by a space
|
48
48
|
text.gsub!(/\s+|\s+/,' ') if @cleanup_zwnj
|
49
49
|
|
50
|
-
# should fix spacing for () [] {} “” «»
|
51
|
-
if @fix_spacing_for_braces_and_quotes
|
52
|
-
text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
|
53
|
-
text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
|
54
|
-
text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
|
55
|
-
text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
|
56
|
-
text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
|
57
|
-
end
|
58
|
-
|
59
50
|
# character replacement
|
60
51
|
persian_numbers = "۱۲۳۴۵۶۷۸۹۰"
|
61
52
|
arabic_numbers = "١٢٣٤٥٦٧٨٩٠"
|
@@ -101,12 +92,25 @@ module Virastar
|
|
101
92
|
# : ; , . ! ? and their persian equivalents should have one space after and no space before
|
102
93
|
if @fix_spacing_for_braces_and_quotes
|
103
94
|
text.gsub!(/[ ]*([:;,؛،.؟!]{1})[ ]*/, '\1 ')
|
95
|
+
# do not put space after colon that separates time parts
|
96
|
+
text.gsub!(/([۰-۹]+):\s+([۰-۹]+)/, '\1:\2')
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
# should fix spacing for () [] {} “” «»
|
102
|
+
if @fix_spacing_for_braces_and_quotes
|
103
|
+
text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
|
104
|
+
text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
|
105
|
+
text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
|
106
|
+
text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
|
107
|
+
text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
|
104
108
|
end
|
105
109
|
|
106
110
|
# should replace more than one space with just a single one
|
107
111
|
if @cleanup_spacing
|
108
112
|
text.gsub!(/[ ]+/,' ')
|
109
|
-
|
113
|
+
text.gsub!(/([\n]+)[ ]*/,'\1')
|
110
114
|
end
|
111
115
|
|
112
116
|
# remove spaces, tabs, and new lines from the beginning and enf of file
|
data/lib/virastar/version.rb
CHANGED
data/spec/virastar_spec.rb
CHANGED
@@ -145,19 +145,19 @@ describe Virastar do
|
|
145
145
|
end
|
146
146
|
|
147
147
|
it "should replace more that one line breaks with just one" do
|
148
|
-
test
|
149
|
-
result
|
150
|
-
test2
|
151
|
-
result2 = "this is
|
152
|
-
test3
|
153
|
-
result3 = "this is \na test"
|
148
|
+
test = "this is \n \n \n \n a test"
|
149
|
+
result = "this is \n\n\n\na test"
|
150
|
+
test2 = "this is\n\n\n\na test"
|
151
|
+
result2 = "this is\n\n\n\na test"
|
152
|
+
test3 = "this is \n\n\n a test"
|
153
|
+
result3 = "this is \n\n\na test"
|
154
154
|
|
155
155
|
test.persian_cleanup.should == result
|
156
156
|
test2.persian_cleanup.should == result2
|
157
157
|
test3.persian_cleanup.should == result3
|
158
158
|
end
|
159
159
|
|
160
|
-
it "should not replace line breaks" do
|
160
|
+
it "should not replace line breaks and should remove spaces after line break" do
|
161
161
|
test = "this is \n a test"
|
162
162
|
result = "this is \na test"
|
163
163
|
test.persian_cleanup.should == result
|
@@ -181,6 +181,30 @@ describe Virastar do
|
|
181
181
|
test.persian_cleanup.should == result
|
182
182
|
end
|
183
183
|
|
184
|
+
it "should not create spacing for something like (,)" do
|
185
|
+
test = "this is (,) comma"
|
186
|
+
result = "this is (،) comma"
|
187
|
+
test.persian_cleanup.should == result
|
188
|
+
end
|
189
|
+
|
190
|
+
it "should not puts space after time colon separator" do
|
191
|
+
test = "12:34"
|
192
|
+
result = "۱۲:۳۴"
|
193
|
+
test.persian_cleanup.should == result
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should not destroy URLs"
|
197
|
+
# do
|
198
|
+
# test = "http://virastar.heroku.com"
|
199
|
+
# result = "http://virastar.heroku.com"
|
200
|
+
# test.persian_cleanup.should == result
|
201
|
+
#end
|
202
|
+
|
203
|
+
it "should not replace line breaks when the line ends with quotes" do
|
204
|
+
test = 'استفاده از "گيومه های فارسي"\nساده است'
|
205
|
+
result = 'استفاده از «گیومههای فارسی» \nساده است'
|
206
|
+
test.persian_cleanup.should == result
|
207
|
+
end
|
184
208
|
|
185
209
|
context "aggressive editing" do
|
186
210
|
it "should replace more than one ! or ? mark with just one" do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: virastar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Allen A. Bargi
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-01-
|
18
|
+
date: 2011-01-22 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|