virastar 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- virastar (0.0.1)
4
+ virastar (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/TODO ADDED
@@ -0,0 +1,12 @@
1
+ - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
2
+ - spacing after , : ; causing a lot of problem in this case (,) => (, )
3
+ - do not destroy urls dots and colons
4
+
5
+ - translate to js
6
+
7
+
8
+ DONE:
9
+ - he yeh => hamzeh should consider arabic yeh and zwnj chars
10
+ - replacing quotes shouldn't be greedy
11
+ - for suffixes also consider tari
12
+ - (IMP) do not replace \n probably caused by fixing spaces after dots and commas
@@ -1,3 +1,3 @@
1
1
  module Virastar
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/virastar.rb CHANGED
@@ -39,10 +39,10 @@ module Virastar
39
39
  text.gsub!(/\s*\.{3,}/,'…') if @fix_three_dots
40
40
 
41
41
  # replace English quotes with their Persian equivalent
42
- text.gsub!(/(["'`]+)(.+)(\1)/, '«\2»') if @fix_english_quotes
42
+ text.gsub!(/(["'`]+)(.+?)(\1)/, '«\2»') if @fix_english_quotes
43
43
 
44
44
  # should convert ه ی to ه
45
- text.gsub!(/(\S)(ه[\s‌])(\s)/, '\1هٔ\3') if @fix_hamzeh
45
+ text.gsub!(/(\S)(ه[\s‌]+[یي])(\s)/, '\1هٔ\3') if @fix_hamzeh
46
46
 
47
47
  # remove unnecessary zwnj char that are succeeded/preceded by a space
48
48
  text.gsub!(/\s+‌|‌\s+/,' ') if @cleanup_zwnj
@@ -80,7 +80,7 @@ module Virastar
80
80
  # put zwnj between word and suffix (*tar *tarin *ha *haye)
81
81
  # there's a possible bug here: های and تر could be separate nouns and not suffix
82
82
  if @fix_suffix_spacing
83
- text.gsub!(/\s+(تر(ین)?|ها(ی)?)\s+/,'‌\1 ')
83
+ text.gsub!(/\s+(تر(ی(ن)?)?|ها(ی)?)\s+/,'‌\1 ') # in case you can not read it: \s+(tar(i(n)?)?|ha(ye)?)\s+
84
84
  end
85
85
 
86
86
  # -- Aggressive Editing ------------------------------------------
@@ -100,13 +100,13 @@ module Virastar
100
100
 
101
101
  # : ; , . ! ? and their persian equivalents should have one space after and no space before
102
102
  if @fix_spacing_for_braces_and_quotes
103
- text.gsub!(/\s*([:;,؛،.؟!]{1})\s*/, '\1 ')
103
+ text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
104
104
  end
105
105
 
106
106
  # should replace more than one space with just a single one
107
107
  if @cleanup_spacing
108
108
  text.gsub!(/[ ]+/,' ')
109
- text.gsub!(/\s*[\n]+\s*/," \n")
109
+ #text.gsub!(/\s*[\n]+\s*/," \n")
110
110
  end
111
111
 
112
112
  # remove spaces, tabs, and new lines from the beginning and enf of file
@@ -47,8 +47,10 @@ describe Virastar do
47
47
  it "should correct :;,.?! spacing (one space after and no space before)" do
48
48
  test = "گفت : سلام"
49
49
  result = "گفت: سلام"
50
- #puts Diffy::Diff.new(test, result).to_s(:color) # TODO: char diff
50
+ test2 = "salam.\n\nkhoobi"
51
+ result2 = "salam. \n\nkhoobi"
51
52
  test.persian_cleanup.should == result
53
+ test2.persian_cleanup.should == result2
52
54
  end
53
55
 
54
56
  it "should replace English quotes with their Persian equivalent" do
@@ -59,11 +61,15 @@ describe Virastar do
59
61
  test5 = "``تست``"
60
62
  result = result2 = result4 = result5 = "«تست»"
61
63
  result3 = "«گفت: سلام»"
64
+ # not greedy
65
+ test6 = '"this" or "that"'
66
+ result6 = '«this» or «that»'
62
67
  test.persian_cleanup.should == result
63
68
  test2.persian_cleanup.should == result2
64
69
  test3.persian_cleanup.should == result3
65
70
  test4.persian_cleanup.should == result4
66
71
  test5.persian_cleanup.should == result5
72
+ test6.persian_cleanup.should == result6
67
73
  end
68
74
 
69
75
  it "should replace three dots with ellipsis" do
@@ -84,9 +90,11 @@ describe Virastar do
84
90
  it "should convert ه ی to هٔ" do
85
91
  test = "خانه ی ما"
86
92
  test2 = "خانه ی ما"
87
- result = result2 = "خانهٔ ما"
93
+ test3 = "خانه ي ما"
94
+ result = result2 = result3 = "خانهٔ ما"
88
95
  test.persian_cleanup.should == result
89
96
  test2.persian_cleanup.should == result2
97
+ test3.persian_cleanup.should == result3
90
98
  end
91
99
 
92
100
  it "should replace double dash to ndash and triple dash to mdash" do
@@ -162,8 +170,8 @@ describe Virastar do
162
170
  result2 = "ما نمی‌توانیم"
163
171
  test3 = "این بهترین کتاب ها است"
164
172
  result3 = "این بهترین کتاب‌ها است"
165
- test4 = "بزرگ تر و قدرتمند ترین زبان های دنیا"
166
- result4 = "بزرگ‌تر و قدرتمند‌ترین زبان‌های دنیا"
173
+ test4 = "بزرگ تری و قدرتمند ترین زبان های دنیا"
174
+ result4 = "بزرگ‌تری و قدرتمند‌ترین زبان‌های دنیا"
167
175
  test.persian_cleanup.should == result
168
176
  end
169
177
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: virastar
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Allen A. Bargi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-19 00:00:00 +01:00
18
+ date: 2011-01-20 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -67,6 +67,7 @@ files:
67
67
  - LICENSE
68
68
  - README.md
69
69
  - Rakefile
70
+ - TODO
70
71
  - lib/virastar.rb
71
72
  - lib/virastar/version.rb
72
73
  - spec/spec_helper.rb