virastar 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/TODO CHANGED
@@ -1,10 +1 @@
1
- - destories /n if the line ends with ""
2
- - do not destroy urls dots and colons
3
- /https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?/
4
-
5
- - translate to js
6
-
7
-
8
- DONE:
9
- - spacing after , : ; causing a lot of problem in this case (,) => (, )
10
- - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
1
+ - translate to js
@@ -1,3 +1,3 @@
1
1
  module Virastar
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/virastar.rb CHANGED
@@ -29,6 +29,16 @@ module Virastar
29
29
 
30
30
  def cleanup
31
31
  text = @text
32
+
33
+ # removing URLS bringing them back at the end of process
34
+ urls = []
35
+ i = 0
36
+ text.gsub!(/https?:\/\/([-\w\.]+)+(:\d+)?(\/([\w\/_\.]*(\?\S+)?)?)?/) do |s|
37
+ urls[i] = s.dup
38
+ i += 1
39
+ "__urls__#{i}__"
40
+ end
41
+
32
42
  # replace double dash to ndash and triple dash to mdash
33
43
  if @fix_dashes
34
44
  text.gsub!(/-{3}/,'—')
@@ -89,33 +99,45 @@ module Virastar
89
99
  end
90
100
  # ----------------------------------------------------------------
91
101
 
102
+ # should fix outside and inside spacing for () [] {} “” «»
103
+ if @fix_spacing_for_braces_and_quotes
104
+ text.gsub!(/[ ‌]*(\()\s*([^)]+?)\s*?(\))[ ‌]*/,' \1\2\3 ')
105
+ text.gsub!(/[ ‌]*(\[)\s*([^)]+?)\s*?(\])[ ‌]*/,' \1\2\3 ')
106
+ text.gsub!(/[ ‌]*(\{)\s*([^)]+?)\s*?(\})[ ‌]*/,' \1\2\3 ')
107
+ text.gsub!(/[ ‌]*(“)\s*([^)]+?)\s*?(”)[ ‌]*/,' \1\2\3 ')
108
+ text.gsub!(/[ ‌]*(«)\s*([^)]+?)\s*?(»)[ ‌]*/,' \1\2\3 ')
109
+ end
110
+
92
111
  # : ; , . ! ? and their persian equivalents should have one space after and no space before
93
112
  if @fix_spacing_for_braces_and_quotes
94
- text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
113
+ text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
95
114
  # do not put space after colon that separates time parts
96
115
  text.gsub!(/([۰-۹]+):\s+([۰-۹]+)/, '\1:\2')
97
116
  end
98
-
99
-
100
117
 
101
- # should fix spacing for () [] {} “” «»
118
+ # should fix inside spacing for () [] {} “” «»
102
119
  if @fix_spacing_for_braces_and_quotes
103
- text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
104
- text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
105
- text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
106
- text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
107
- text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
120
+ text.gsub!(/(\()\s*([^)]+?)\s*?(\))/,'\1\2\3')
121
+ text.gsub!(/(\[)\s*([^)]+?)\s*?(\])/,'\1\2\3')
122
+ text.gsub!(/(\{)\s*([^)]+?)\s*?(\})/,'\1\2\3')
123
+ text.gsub!(/(“)\s*([^)]+?)\s*?(”)/,'\1\2\3')
124
+ text.gsub!(/(«)\s*([^)]+?)\s*?(»)/,'\1\2\3')
108
125
  end
109
126
 
110
127
  # should replace more than one space with just a single one
111
128
  if @cleanup_spacing
112
129
  text.gsub!(/[ ]+/,' ')
113
- text.gsub!(/([\n]+)[ ‌]*/,'\1')
130
+ text.gsub!(/([\n]+)[ ‌]*/,'\1')
114
131
  end
115
132
 
116
133
  # remove spaces, tabs, and new lines from the beginning and enf of file
117
134
  text.strip! if @cleanup_begin_and_end
118
135
 
136
+ # bringing back urls
137
+ text.gsub!(/__urls__\d+__/) do |s|
138
+ urls[s.split("__").last.to_i - 1]
139
+ end
140
+
119
141
  text
120
142
  end
121
143
 
@@ -193,16 +193,24 @@ describe Virastar do
193
193
  test.persian_cleanup.should == result
194
194
  end
195
195
 
196
- it "should not destroy URLs"
197
- # do
198
- # test = "http://virastar.heroku.com"
199
- # result = "http://virastar.heroku.com"
200
- # test.persian_cleanup.should == result
201
- #end
196
+ it "should not destroy URLs" do
197
+ test = "http://virastar.heroku.com"
198
+ result = "http://virastar.heroku.com"
199
+ test2 = "http://virastar.heroku.com\nhttp://balatarin.com"
200
+ result2 = "http://virastar.heroku.com\nhttp://balatarin.com"
201
+ test.persian_cleanup.should == result
202
+ test2.persian_cleanup.should == result2
203
+ end
202
204
 
203
205
  it "should not replace line breaks when the line ends with quotes" do
204
- test = 'استفاده از "گيومه های فارسي"\nساده است'
205
- result = 'استفاده از «گیومه‌های فارسی» \nساده است'
206
+ test = "salam \"khoobi\" \n chetori"
207
+ result = "salam «khoobi» \nchetori"
208
+ test.persian_cleanup.should == result
209
+ end
210
+
211
+ it "should not put space after quotes, {}, () or [] if there's ,.; just after that" do
212
+ test = "«This», {this}, (this), [this] or {this}. sometimes (this)."
213
+ result = "«This»، {this}، (this)، [this] or {this}. sometimes (this)."
206
214
  test.persian_cleanup.should == result
207
215
  end
208
216
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: virastar
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 4
10
- version: 0.0.4
9
+ - 5
10
+ version: 0.0.5
11
11
  platform: ruby
12
12
  authors:
13
13
  - Allen A. Bargi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-22 00:00:00 +01:00
18
+ date: 2011-01-23 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency