virastar 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO +4 -6
- data/lib/virastar.rb +14 -10
- data/lib/virastar/version.rb +1 -1
- data/spec/virastar_spec.rb +31 -7
- metadata +4 -4
data/TODO
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
-
|
2
|
-
- spacing after , : ; causing a lot of problem in this case (,) => (, )
|
1
|
+
- destories /n if the line ends with ""
|
3
2
|
- do not destroy urls dots and colons
|
3
|
+
/https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?/
|
4
4
|
|
5
5
|
- translate to js
|
6
6
|
|
7
7
|
|
8
8
|
DONE:
|
9
|
-
-
|
10
|
-
-
|
11
|
-
- for suffixes also consider tari
|
12
|
-
- (IMP) do not replace \n probably caused by fixing spaces after dots and commas
|
9
|
+
- spacing after , : ; causing a lot of problem in this case (,) => (, )
|
10
|
+
- do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
|
data/lib/virastar.rb
CHANGED
@@ -47,15 +47,6 @@ module Virastar
|
|
47
47
|
# remove unnecessary zwnj char that are succeeded/preceded by a space
|
48
48
|
text.gsub!(/\s+|\s+/,' ') if @cleanup_zwnj
|
49
49
|
|
50
|
-
# should fix spacing for () [] {} “” «»
|
51
|
-
if @fix_spacing_for_braces_and_quotes
|
52
|
-
text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
|
53
|
-
text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
|
54
|
-
text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
|
55
|
-
text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
|
56
|
-
text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
|
57
|
-
end
|
58
|
-
|
59
50
|
# character replacement
|
60
51
|
persian_numbers = "۱۲۳۴۵۶۷۸۹۰"
|
61
52
|
arabic_numbers = "١٢٣٤٥٦٧٨٩٠"
|
@@ -101,12 +92,25 @@ module Virastar
|
|
101
92
|
# : ; , . ! ? and their persian equivalents should have one space after and no space before
|
102
93
|
if @fix_spacing_for_braces_and_quotes
|
103
94
|
text.gsub!(/[ ]*([:;,؛،.؟!]{1})[ ]*/, '\1 ')
|
95
|
+
# do not put space after colon that separates time parts
|
96
|
+
text.gsub!(/([۰-۹]+):\s+([۰-۹]+)/, '\1:\2')
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
# should fix spacing for () [] {} “” «»
|
102
|
+
if @fix_spacing_for_braces_and_quotes
|
103
|
+
text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
|
104
|
+
text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
|
105
|
+
text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
|
106
|
+
text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
|
107
|
+
text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
|
104
108
|
end
|
105
109
|
|
106
110
|
# should replace more than one space with just a single one
|
107
111
|
if @cleanup_spacing
|
108
112
|
text.gsub!(/[ ]+/,' ')
|
109
|
-
|
113
|
+
text.gsub!(/([\n]+)[ ]*/,'\1')
|
110
114
|
end
|
111
115
|
|
112
116
|
# remove spaces, tabs, and new lines from the beginning and enf of file
|
data/lib/virastar/version.rb
CHANGED
data/spec/virastar_spec.rb
CHANGED
@@ -145,19 +145,19 @@ describe Virastar do
|
|
145
145
|
end
|
146
146
|
|
147
147
|
it "should replace more that one line breaks with just one" do
|
148
|
-
test
|
149
|
-
result
|
150
|
-
test2
|
151
|
-
result2 = "this is
|
152
|
-
test3
|
153
|
-
result3 = "this is \na test"
|
148
|
+
test = "this is \n \n \n \n a test"
|
149
|
+
result = "this is \n\n\n\na test"
|
150
|
+
test2 = "this is\n\n\n\na test"
|
151
|
+
result2 = "this is\n\n\n\na test"
|
152
|
+
test3 = "this is \n\n\n a test"
|
153
|
+
result3 = "this is \n\n\na test"
|
154
154
|
|
155
155
|
test.persian_cleanup.should == result
|
156
156
|
test2.persian_cleanup.should == result2
|
157
157
|
test3.persian_cleanup.should == result3
|
158
158
|
end
|
159
159
|
|
160
|
-
it "should not replace line breaks" do
|
160
|
+
it "should not replace line breaks and should remove spaces after line break" do
|
161
161
|
test = "this is \n a test"
|
162
162
|
result = "this is \na test"
|
163
163
|
test.persian_cleanup.should == result
|
@@ -181,6 +181,30 @@ describe Virastar do
|
|
181
181
|
test.persian_cleanup.should == result
|
182
182
|
end
|
183
183
|
|
184
|
+
it "should not create spacing for something like (,)" do
|
185
|
+
test = "this is (,) comma"
|
186
|
+
result = "this is (،) comma"
|
187
|
+
test.persian_cleanup.should == result
|
188
|
+
end
|
189
|
+
|
190
|
+
it "should not puts space after time colon separator" do
|
191
|
+
test = "12:34"
|
192
|
+
result = "۱۲:۳۴"
|
193
|
+
test.persian_cleanup.should == result
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should not destroy URLs"
|
197
|
+
# do
|
198
|
+
# test = "http://virastar.heroku.com"
|
199
|
+
# result = "http://virastar.heroku.com"
|
200
|
+
# test.persian_cleanup.should == result
|
201
|
+
#end
|
202
|
+
|
203
|
+
it "should not replace line breaks when the line ends with quotes" do
|
204
|
+
test = 'استفاده از "گيومه های فارسي"\nساده است'
|
205
|
+
result = 'استفاده از «گیومههای فارسی» \nساده است'
|
206
|
+
test.persian_cleanup.should == result
|
207
|
+
end
|
184
208
|
|
185
209
|
context "aggressive editing" do
|
186
210
|
it "should replace more than one ! or ? mark with just one" do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: virastar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Allen A. Bargi
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-01-
|
18
|
+
date: 2011-01-22 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|