pristine_text 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 03ce1c942c7e32460cf0c312a5d897a205f4a2a7
4
- data.tar.gz: 67916011e5695da1ec0c521e5f693215f3c82238
3
+ metadata.gz: 26c345b29f87af15925158a92b87951a401ccd1e
4
+ data.tar.gz: f6bfa2f8ac0574daa87f37eb2f5634a3656686fa
5
5
  SHA512:
6
- metadata.gz: c43004bf30849ab031a6c314a52bad0b863c1591304c082e5761dccc55f0bf658e9ea855a6a46e03c6e35412355c62347c494c2b4f8f25ec30a21a2dce10d830
7
- data.tar.gz: 43a0bbcb713ab367ccd531448a82a101de22af9f85fe6fdd5acf2556918533cb647ad575f2af790139cdfe89a62f1408486f124d675e236e0129ae886522d015
6
+ metadata.gz: 124f737e2ed1d18e423fae1d12033767d657cb7b9c5281472baf2736e096efc41077b6515087a6498bae0114e861b48216924b365156fdb55d571f821fa01ec4
7
+ data.tar.gz: 7d9320dd6e6443b376af26bcc74ef83812ef60f881281216e90358d2dfa301bc97fd90a026004d2028cd13934b96f2ab7a61d2434285bd9c555431fa5d8ac234
data/.gitignore CHANGED
@@ -12,3 +12,4 @@
12
12
  *.o
13
13
  *.a
14
14
  mkmf.log
15
+ *.gem
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # PristineText
2
2
 
3
- This gem uses unicode_utils to lowercase text, removes non-letters, strips and squeezes whitespace, then optionally stemwords (from stemming-tools) to stem every word.
3
+ This gem uses unicode_utils to lowercase text, removes non-letters, strips and squeezes whitespace, then optionally uses stemwords (from libstemming-tools) to stem every word.
4
4
 
5
5
  ## Installation
6
6
 
@@ -20,9 +20,9 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- require "pristine_text"
23
+ require "pristine_text"
24
24
 
25
- puts PristineText.clean("haberler geliyorlar gidiyorlar", :tr)
25
+ puts PristineText.clean("haberler geliyorlar gidiyorlar", :tr)
26
26
 
27
27
  ## Contributing
28
28
 
@@ -1,3 +1,3 @@
1
1
  module PristineText
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/pristine_text.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "pristine_text/version"
2
2
  require "open3"
3
3
  require "unicode_utils"
4
+ require "cgi"
4
5
 
5
6
  module PristineText
6
7
  def self.pipe(text, locale)
@@ -23,7 +24,7 @@ module PristineText
23
24
  end
24
25
 
25
26
  def self.clean(text, locale= :en, stem= true)
26
- text= UnicodeUtils.downcase(text, locale).
27
+ text= UnicodeUtils.downcase(CGI.unescapeHTML(text).gsub(/ /, "\n"), locale).
27
28
  gsub(/[^\p{Letter}\s]+/, "").
28
29
  strip.squeeze
29
30
  if stem
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Nurettin Onur TUĞCU"]
10
10
  spec.email = ["onurtugcu@gmail.com"]
11
11
  spec.summary = %q{Lowercase, squeeze, stem text.}
12
- spec.description = %q{This gem uses unicode_utils to lowercase text, removes non-letters, strips and squeezes whitespace, then optionally stemwords (from stemming-tools) to stem every word.}
12
+ spec.description = %q{This gem uses unicode_utils to lowercase text, removes non-letters, strips and squeezes whitespace, then optionally uses stemwords (from libstemming-tools) to stem every word.}
13
13
  spec.homepage = "https://github.com/nurettin/pristine_text"
14
14
  spec.license = "MIT"
15
15
 
@@ -0,0 +1,9 @@
1
+ require "pristine_text"
2
+ require "minitest/autorun"
3
+
4
+ class PristineTextTest< Minitest::Unit::TestCase
5
+ def test_clean
6
+ assert_equal PristineText.clean("haberler geliyorlar gidiyorlar", :tr), "haber geliyor gidiyor"
7
+ assert_equal PristineText.clean("}{ÜĞ09&nbsp;İŞi!'^+çö\n\t\v][';.,üğişçö&quot;", :tr), "üğ işiçö üğişçö"
8
+ end
9
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pristine_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nurettin Onur TUĞCU
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-31 00:00:00.000000000 Z
11
+ date: 2014-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -53,8 +53,8 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  description: This gem uses unicode_utils to lowercase text, removes non-letters, strips
56
- and squeezes whitespace, then optionally stemwords (from stemming-tools) to stem
57
- every word.
56
+ and squeezes whitespace, then optionally uses stemwords (from libstemming-tools)
57
+ to stem every word.
58
58
  email:
59
59
  - onurtugcu@gmail.com
60
60
  executables: []
@@ -69,6 +69,7 @@ files:
69
69
  - lib/pristine_text.rb
70
70
  - lib/pristine_text/version.rb
71
71
  - pristine_text.gemspec
72
+ - test/pristine_text.rb
72
73
  homepage: https://github.com/nurettin/pristine_text
73
74
  licenses:
74
75
  - MIT
@@ -89,8 +90,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
90
  version: '0'
90
91
  requirements: []
91
92
  rubyforge_project:
92
- rubygems_version: 2.2.2
93
+ rubygems_version: 2.4.2
93
94
  signing_key:
94
95
  specification_version: 4
95
96
  summary: Lowercase, squeeze, stem text.
96
- test_files: []
97
+ test_files:
98
+ - test/pristine_text.rb