virastar 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/TODO CHANGED
@@ -1,12 +1,10 @@
1
- - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
2
- - spacing after , : ; causing a lot of problem in this case (,) => (, )
1
+ - destories /n if the line ends with ""
3
2
  - do not destroy urls dots and colons
3
+ /https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?/
4
4
 
5
5
  - translate to js
6
6
 
7
7
 
8
8
  DONE:
9
- - he yeh => hamzeh should consider arabic yeh and zwnj chars
10
- - replacing quotes shouldn't be greedy
11
- - for suffixes also consider tari
12
- - (IMP) do not replace \n probably caused by fixing spaces after dots and commas
9
+ - spacing after , : ; causing a lot of problem in this case (,) => (, )
10
+ - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
@@ -47,15 +47,6 @@ module Virastar
47
47
  # remove unnecessary zwnj char that are succeeded/preceded by a space
48
48
  text.gsub!(/\s+‌|‌\s+/,' ') if @cleanup_zwnj
49
49
 
50
- # should fix spacing for () [] {} “” «»
51
- if @fix_spacing_for_braces_and_quotes
52
- text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
53
- text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
54
- text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
55
- text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
56
- text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
57
- end
58
-
59
50
  # character replacement
60
51
  persian_numbers = "۱۲۳۴۵۶۷۸۹۰"
61
52
  arabic_numbers = "١٢٣٤٥٦٧٨٩٠"
@@ -101,12 +92,25 @@ module Virastar
101
92
  # : ; , . ! ? and their persian equivalents should have one space after and no space before
102
93
  if @fix_spacing_for_braces_and_quotes
103
94
  text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
95
+ # do not put space after colon that separates time parts
96
+ text.gsub!(/([۰-۹]+):\s+([۰-۹]+)/, '\1:\2')
97
+ end
98
+
99
+
100
+
101
+ # should fix spacing for () [] {} “” «»
102
+ if @fix_spacing_for_braces_and_quotes
103
+ text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
104
+ text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
105
+ text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
106
+ text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
107
+ text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
104
108
  end
105
109
 
106
110
  # should replace more than one space with just a single one
107
111
  if @cleanup_spacing
108
112
  text.gsub!(/[ ]+/,' ')
109
- #text.gsub!(/\s*[\n]+\s*/," \n")
113
+ text.gsub!(/([\n]+)[ ‌]*/,'\1')
110
114
  end
111
115
 
112
116
  # remove spaces, tabs, and new lines from the beginning and enf of file
@@ -1,3 +1,3 @@
1
1
  module Virastar
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -145,19 +145,19 @@ describe Virastar do
145
145
  end
146
146
 
147
147
  it "should replace more that one line breaks with just one" do
148
- test = "this is \n \n \n \n a test"
149
- result = "this is \na test"
150
- test2 = "this is\n\n\n\na test"
151
- result2 = "this is \na test"
152
- test3 = "this is \n\n\n\n a test"
153
- result3 = "this is \na test"
148
+ test = "this is \n \n \n \n a test"
149
+ result = "this is \n\n\n\na test"
150
+ test2 = "this is\n\n\n\na test"
151
+ result2 = "this is\n\n\n\na test"
152
+ test3 = "this is \n\n\n a test"
153
+ result3 = "this is \n\n\na test"
154
154
 
155
155
  test.persian_cleanup.should == result
156
156
  test2.persian_cleanup.should == result2
157
157
  test3.persian_cleanup.should == result3
158
158
  end
159
159
 
160
- it "should not replace line breaks" do
160
+ it "should not replace line breaks and should remove spaces after line break" do
161
161
  test = "this is \n a test"
162
162
  result = "this is \na test"
163
163
  test.persian_cleanup.should == result
@@ -181,6 +181,30 @@ describe Virastar do
181
181
  test.persian_cleanup.should == result
182
182
  end
183
183
 
184
+ it "should not create spacing for something like (,)" do
185
+ test = "this is (,) comma"
186
+ result = "this is (،) comma"
187
+ test.persian_cleanup.should == result
188
+ end
189
+
190
+ it "should not puts space after time colon separator" do
191
+ test = "12:34"
192
+ result = "۱۲:۳۴"
193
+ test.persian_cleanup.should == result
194
+ end
195
+
196
+ it "should not destroy URLs"
197
+ # do
198
+ # test = "http://virastar.heroku.com"
199
+ # result = "http://virastar.heroku.com"
200
+ # test.persian_cleanup.should == result
201
+ #end
202
+
203
+ it "should not replace line breaks when the line ends with quotes" do
204
+ test = 'استفاده از "گيومه های فارسي"\nساده است'
205
+ result = 'استفاده از «گیومه‌های فارسی» \nساده است'
206
+ test.persian_cleanup.should == result
207
+ end
184
208
 
185
209
  context "aggressive editing" do
186
210
  it "should replace more than one ! or ? mark with just one" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: virastar
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Allen A. Bargi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-20 00:00:00 +01:00
18
+ date: 2011-01-22 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency