virastar 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- virastar (0.0.1)
4
+ virastar (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/TODO ADDED
@@ -0,0 +1,12 @@
1
+ - do not put space after : in the context of numbers like hour 19:45 => ۱۹:۴۵
2
+ - spacing after , : ; causing a lot of problem in this case (,) => (, )
3
+ - do not destroy urls dots and colons
4
+
5
+ - translate to js
6
+
7
+
8
+ DONE:
9
+ - he yeh => hamzeh should consider arabic yeh and zwnj chars
10
+ - replacing quotes shouldn't be greedy
11
+ - for suffixes also consider tari
12
+ - (IMP) do not replace \n probably caused by fixing spaces after dots and commas
@@ -1,3 +1,3 @@
1
1
  module Virastar
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/virastar.rb CHANGED
@@ -39,10 +39,10 @@ module Virastar
39
39
  text.gsub!(/\s*\.{3,}/,'…') if @fix_three_dots
40
40
 
41
41
  # replace English quotes with their Persian equivalent
42
- text.gsub!(/(["'`]+)(.+)(\1)/, '«\2»') if @fix_english_quotes
42
+ text.gsub!(/(["'`]+)(.+?)(\1)/, '«\2»') if @fix_english_quotes
43
43
 
44
44
  # should convert ه ی to ه
45
- text.gsub!(/(\S)(ه[\s‌])(\s)/, '\1هٔ\3') if @fix_hamzeh
45
+ text.gsub!(/(\S)(ه[\s‌]+[یي])(\s)/, '\1هٔ\3') if @fix_hamzeh
46
46
 
47
47
  # remove unnecessary zwnj char that are succeeded/preceded by a space
48
48
  text.gsub!(/\s+‌|‌\s+/,' ') if @cleanup_zwnj
@@ -80,7 +80,7 @@ module Virastar
80
80
  # put zwnj between word and suffix (*tar *tarin *ha *haye)
81
81
  # there's a possible bug here: های and تر could be separate nouns and not suffix
82
82
  if @fix_suffix_spacing
83
- text.gsub!(/\s+(تر(ین)?|ها(ی)?)\s+/,'‌\1 ')
83
+ text.gsub!(/\s+(تر(ی(ن)?)?|ها(ی)?)\s+/,'‌\1 ') # in case you can not read it: \s+(tar(i(n)?)?|ha(ye)?)\s+
84
84
  end
85
85
 
86
86
  # -- Aggressive Editing ------------------------------------------
@@ -100,13 +100,13 @@ module Virastar
100
100
 
101
101
  # : ; , . ! ? and their persian equivalents should have one space after and no space before
102
102
  if @fix_spacing_for_braces_and_quotes
103
- text.gsub!(/\s*([:;,؛،.؟!]{1})\s*/, '\1 ')
103
+ text.gsub!(/[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*/, '\1 ')
104
104
  end
105
105
 
106
106
  # should replace more than one space with just a single one
107
107
  if @cleanup_spacing
108
108
  text.gsub!(/[ ]+/,' ')
109
- text.gsub!(/\s*[\n]+\s*/," \n")
109
+ #text.gsub!(/\s*[\n]+\s*/," \n")
110
110
  end
111
111
 
112
112
  # remove spaces, tabs, and new lines from the beginning and enf of file
@@ -47,8 +47,10 @@ describe Virastar do
47
47
  it "should correct :;,.?! spacing (one space after and no space before)" do
48
48
  test = "گفت : سلام"
49
49
  result = "گفت: سلام"
50
- #puts Diffy::Diff.new(test, result).to_s(:color) # TODO: char diff
50
+ test2 = "salam.\n\nkhoobi"
51
+ result2 = "salam. \n\nkhoobi"
51
52
  test.persian_cleanup.should == result
53
+ test2.persian_cleanup.should == result2
52
54
  end
53
55
 
54
56
  it "should replace English quotes with their Persian equivalent" do
@@ -59,11 +61,15 @@ describe Virastar do
59
61
  test5 = "``تست``"
60
62
  result = result2 = result4 = result5 = "«تست»"
61
63
  result3 = "«گفت: سلام»"
64
+ # not greedy
65
+ test6 = '"this" or "that"'
66
+ result6 = '«this» or «that»'
62
67
  test.persian_cleanup.should == result
63
68
  test2.persian_cleanup.should == result2
64
69
  test3.persian_cleanup.should == result3
65
70
  test4.persian_cleanup.should == result4
66
71
  test5.persian_cleanup.should == result5
72
+ test6.persian_cleanup.should == result6
67
73
  end
68
74
 
69
75
  it "should replace three dots with ellipsis" do
@@ -84,9 +90,11 @@ describe Virastar do
84
90
  it "should convert ه ی to هٔ" do
85
91
  test = "خانه ی ما"
86
92
  test2 = "خانه ی ما"
87
- result = result2 = "خانهٔ ما"
93
+ test3 = "خانه ي ما"
94
+ result = result2 = result3 = "خانهٔ ما"
88
95
  test.persian_cleanup.should == result
89
96
  test2.persian_cleanup.should == result2
97
+ test3.persian_cleanup.should == result3
90
98
  end
91
99
 
92
100
  it "should replace double dash to ndash and triple dash to mdash" do
@@ -162,8 +170,8 @@ describe Virastar do
162
170
  result2 = "ما نمی‌توانیم"
163
171
  test3 = "این بهترین کتاب ها است"
164
172
  result3 = "این بهترین کتاب‌ها است"
165
- test4 = "بزرگ تر و قدرتمند ترین زبان های دنیا"
166
- result4 = "بزرگ‌تر و قدرتمند‌ترین زبان‌های دنیا"
173
+ test4 = "بزرگ تری و قدرتمند ترین زبان های دنیا"
174
+ result4 = "بزرگ‌تری و قدرتمند‌ترین زبان‌های دنیا"
167
175
  test.persian_cleanup.should == result
168
176
  end
169
177
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: virastar
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Allen A. Bargi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-19 00:00:00 +01:00
18
+ date: 2011-01-20 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -67,6 +67,7 @@ files:
67
67
  - LICENSE
68
68
  - README.md
69
69
  - Rakefile
70
+ - TODO
70
71
  - lib/virastar.rb
71
72
  - lib/virastar/version.rb
72
73
  - spec/spec_helper.rb