virastar 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/TODO CHANGED
@@ -1,12 +1,10 @@
1
- - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
2
- - spacing after , : ; causing a lot of problem in this case (,) => (, )
1
+ - destories /n if the line ends with ""
3
2
  - do not destroy urls dots and colons
3
+ /https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?/
4
4
 
5
5
  - translate to js
6
6
 
7
7
 
8
8
  DONE:
9
- - he yeh => hamzeh should consider arabic yeh and zwnj chars
10
- - replacing quotes shouldn't be greedy
11
- - for suffixes also consider tari
12
- - (IMP) do not replace \n probably caused by fixing spaces after dots and commas
9
+ - spacing after , : ; causing a lot of problem in this case (,) => (, )
10
+ - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
@@ -47,15 +47,6 @@ module Virastar
47
47
  # remove unnecessary zwnj char that are succeeded/preceded by a space
48
48
  text.gsub!(/\s+‌|‌\s+/,' ') if @cleanup_zwnj
49
49
 
50
- # should fix spacing for () [] {} “” «»
51
- if @fix_spacing_for_braces_and_quotes
52
- text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
53
- text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
54
- text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
55
- text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
56
- text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
57
- end
58
-
59
50
  # character replacement
60
51
  persian_numbers = "۱۲۳۴۵۶۷۸۹۰"
61
52
  arabic_numbers = "١٢٣٤٥٦٧٨٩٠"
@@ -101,12 +92,25 @@ module Virastar
101
92
  # : ; , . ! ? and their persian equivalents should have one space after and no space before
102
93
  if @fix_spacing_for_braces_and_quotes
103
94
  text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
95
+ # do not put space after colon that separates time parts
96
+ text.gsub!(/([۰-۹]+):\s+([۰-۹]+)/, '\1:\2')
97
+ end
98
+
99
+
100
+
101
+ # should fix spacing for () [] {} “” «»
102
+ if @fix_spacing_for_braces_and_quotes
103
+ text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
104
+ text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
105
+ text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
106
+ text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
107
+ text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
104
108
  end
105
109
 
106
110
  # should replace more than one space with just a single one
107
111
  if @cleanup_spacing
108
112
  text.gsub!(/[ ]+/,' ')
109
- #text.gsub!(/\s*[\n]+\s*/," \n")
113
+ text.gsub!(/([\n]+)[ ‌]*/,'\1')
110
114
  end
111
115
 
112
116
  # remove spaces, tabs, and new lines from the beginning and enf of file
@@ -1,3 +1,3 @@
1
1
  module Virastar
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -145,19 +145,19 @@ describe Virastar do
145
145
  end
146
146
 
147
147
  it "should replace more that one line breaks with just one" do
148
- test = "this is \n \n \n \n a test"
149
- result = "this is \na test"
150
- test2 = "this is\n\n\n\na test"
151
- result2 = "this is \na test"
152
- test3 = "this is \n\n\n\n a test"
153
- result3 = "this is \na test"
148
+ test = "this is \n \n \n \n a test"
149
+ result = "this is \n\n\n\na test"
150
+ test2 = "this is\n\n\n\na test"
151
+ result2 = "this is\n\n\n\na test"
152
+ test3 = "this is \n\n\n a test"
153
+ result3 = "this is \n\n\na test"
154
154
 
155
155
  test.persian_cleanup.should == result
156
156
  test2.persian_cleanup.should == result2
157
157
  test3.persian_cleanup.should == result3
158
158
  end
159
159
 
160
- it "should not replace line breaks" do
160
+ it "should not replace line breaks and should remove spaces after line break" do
161
161
  test = "this is \n a test"
162
162
  result = "this is \na test"
163
163
  test.persian_cleanup.should == result
@@ -181,6 +181,30 @@ describe Virastar do
181
181
  test.persian_cleanup.should == result
182
182
  end
183
183
 
184
+ it "should not create spacing for something like (,)" do
185
+ test = "this is (,) comma"
186
+ result = "this is (،) comma"
187
+ test.persian_cleanup.should == result
188
+ end
189
+
190
+ it "should not puts space after time colon separator" do
191
+ test = "12:34"
192
+ result = "۱۲:۳۴"
193
+ test.persian_cleanup.should == result
194
+ end
195
+
196
+ it "should not destroy URLs"
197
+ # do
198
+ # test = "http://virastar.heroku.com"
199
+ # result = "http://virastar.heroku.com"
200
+ # test.persian_cleanup.should == result
201
+ #end
202
+
203
+ it "should not replace line breaks when the line ends with quotes" do
204
+ test = 'استفاده از "گيومه های فارسي"\nساده است'
205
+ result = 'استفاده از «گیومه‌های فارسی» \nساده است'
206
+ test.persian_cleanup.should == result
207
+ end
184
208
 
185
209
  context "aggressive editing" do
186
210
  it "should replace more than one ! or ? mark with just one" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: virastar
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Allen A. Bargi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-20 00:00:00 +01:00
18
+ date: 2011-01-22 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency