virastar 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/TODO CHANGED
@@ -1,10 +1 @@
1
- - destories /n if the line ends with ""
2
- - do not destroy urls dots and colons
3
- /https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?/
4
-
5
- - translate to js
6
-
7
-
8
- DONE:
9
- - spacing after , : ; causing a lot of problem in this case (,) => (, )
10
- - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
1
+ - translate to js
@@ -1,3 +1,3 @@
1
1
  module Virastar
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/virastar.rb CHANGED
@@ -29,6 +29,16 @@ module Virastar
29
29
 
30
30
  def cleanup
31
31
  text = @text
32
+
33
+ # removing URLS bringing them back at the end of process
34
+ urls = []
35
+ i = 0
36
+ text.gsub!(/https?:\/\/([-\w\.]+)+(:\d+)?(\/([\w\/_\.]*(\?\S+)?)?)?/) do |s|
37
+ urls[i] = s.dup
38
+ i += 1
39
+ "__urls__#{i}__"
40
+ end
41
+
32
42
  # replace double dash to ndash and triple dash to mdash
33
43
  if @fix_dashes
34
44
  text.gsub!(/-{3}/,'—')
@@ -89,33 +99,45 @@ module Virastar
89
99
  end
90
100
  # ----------------------------------------------------------------
91
101
 
102
+ # should fix outside and inside spacing for () [] {} “” «»
103
+ if @fix_spacing_for_braces_and_quotes
104
+ text.gsub!(/[ ‌]*(\()\s*([^)]+?)\s*?(\))[ ‌]*/,' \1\2\3 ')
105
+ text.gsub!(/[ ‌]*(\[)\s*([^)]+?)\s*?(\])[ ‌]*/,' \1\2\3 ')
106
+ text.gsub!(/[ ‌]*(\{)\s*([^)]+?)\s*?(\})[ ‌]*/,' \1\2\3 ')
107
+ text.gsub!(/[ ‌]*(“)\s*([^)]+?)\s*?(”)[ ‌]*/,' \1\2\3 ')
108
+ text.gsub!(/[ ‌]*(«)\s*([^)]+?)\s*?(»)[ ‌]*/,' \1\2\3 ')
109
+ end
110
+
92
111
  # : ; , . ! ? and their persian equivalents should have one space after and no space before
93
112
  if @fix_spacing_for_braces_and_quotes
94
- text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
113
+ text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
95
114
  # do not put space after colon that separates time parts
96
115
  text.gsub!(/([۰-۹]+):\s+([۰-۹]+)/, '\1:\2')
97
116
  end
98
-
99
-
100
117
 
101
- # should fix spacing for () [] {} “” «»
118
+ # should fix inside spacing for () [] {} “” «»
102
119
  if @fix_spacing_for_braces_and_quotes
103
- text.gsub!(/\s*(\()\s*([^)]+?)\s*?(\))\s*/,' \1\2\3 ')
104
- text.gsub!(/\s*(\[)\s*([^)]+?)\s*?(\])\s*/,' \1\2\3 ')
105
- text.gsub!(/\s*(\{)\s*([^)]+?)\s*?(\})\s*/,' \1\2\3 ')
106
- text.gsub!(/\s*(“)\s*([^)]+?)\s*?(”)\s*/,' \1\2\3 ')
107
- text.gsub!(/\s*(«)\s*([^)]+?)\s*?(»)\s*/,' \1\2\3 ')
120
+ text.gsub!(/(\()\s*([^)]+?)\s*?(\))/,'\1\2\3')
121
+ text.gsub!(/(\[)\s*([^)]+?)\s*?(\])/,'\1\2\3')
122
+ text.gsub!(/(\{)\s*([^)]+?)\s*?(\})/,'\1\2\3')
123
+ text.gsub!(/(“)\s*([^)]+?)\s*?(”)/,'\1\2\3')
124
+ text.gsub!(/(«)\s*([^)]+?)\s*?(»)/,'\1\2\3')
108
125
  end
109
126
 
110
127
  # should replace more than one space with just a single one
111
128
  if @cleanup_spacing
112
129
  text.gsub!(/[ ]+/,' ')
113
- text.gsub!(/([\n]+)[ ‌]*/,'\1')
130
+ text.gsub!(/([\n]+)[ ‌]*/,'\1')
114
131
  end
115
132
 
116
133
  # remove spaces, tabs, and new lines from the beginning and enf of file
117
134
  text.strip! if @cleanup_begin_and_end
118
135
 
136
+ # bringing back urls
137
+ text.gsub!(/__urls__\d+__/) do |s|
138
+ urls[s.split("__").last.to_i - 1]
139
+ end
140
+
119
141
  text
120
142
  end
121
143
 
@@ -193,16 +193,24 @@ describe Virastar do
193
193
  test.persian_cleanup.should == result
194
194
  end
195
195
 
196
- it "should not destroy URLs"
197
- # do
198
- # test = "http://virastar.heroku.com"
199
- # result = "http://virastar.heroku.com"
200
- # test.persian_cleanup.should == result
201
- #end
196
+ it "should not destroy URLs" do
197
+ test = "http://virastar.heroku.com"
198
+ result = "http://virastar.heroku.com"
199
+ test2 = "http://virastar.heroku.com\nhttp://balatarin.com"
200
+ result2 = "http://virastar.heroku.com\nhttp://balatarin.com"
201
+ test.persian_cleanup.should == result
202
+ test2.persian_cleanup.should == result2
203
+ end
202
204
 
203
205
  it "should not replace line breaks when the line ends with quotes" do
204
- test = 'استفاده از "گيومه های فارسي"\nساده است'
205
- result = 'استفاده از «گیومه‌های فارسی» \nساده است'
206
+ test = "salam \"khoobi\" \n chetori"
207
+ result = "salam «khoobi» \nchetori"
208
+ test.persian_cleanup.should == result
209
+ end
210
+
211
+ it "should not put space after quotes, {}, () or [] if there's ,.; just after that" do
212
+ test = "«This», {this}, (this), [this] or {this}. sometimes (this)."
213
+ result = "«This»، {this}، (this)، [this] or {this}. sometimes (this)."
206
214
  test.persian_cleanup.should == result
207
215
  end
208
216
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: virastar
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 4
10
- version: 0.0.4
9
+ - 5
10
+ version: 0.0.5
11
11
  platform: ruby
12
12
  authors:
13
13
  - Allen A. Bargi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-22 00:00:00 +01:00
18
+ date: 2011-01-23 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency