pristine_text 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26c345b29f87af15925158a92b87951a401ccd1e
4
- data.tar.gz: f6bfa2f8ac0574daa87f37eb2f5634a3656686fa
3
+ metadata.gz: aaae999b5981fa51e5d31efeaf45c35e061b3e46
4
+ data.tar.gz: 345716e8f916ae88766fed3f342690ea7f8a18ff
5
5
  SHA512:
6
- metadata.gz: 124f737e2ed1d18e423fae1d12033767d657cb7b9c5281472baf2736e096efc41077b6515087a6498bae0114e861b48216924b365156fdb55d571f821fa01ec4
7
- data.tar.gz: 7d9320dd6e6443b376af26bcc74ef83812ef60f881281216e90358d2dfa301bc97fd90a026004d2028cd13934b96f2ab7a61d2434285bd9c555431fa5d8ac234
6
+ metadata.gz: 22f76bfcc47e5233a13ded3e9c82304c19c8b8656e02fd4e184e37ad0a3c12b95471b8fa3b877202659fe69a75c791e6f56f1019fed5b66dcbebb8cf966d6b7f
7
+ data.tar.gz: 99a2c9034af5efea3c4be578aecab883553350c210c6df78e1d54bbb3e28499c21b7a55dc7b0b1dd306be1640d2c2739f987f62c1283455382005bdefd29a563
data/lib/pristine_text.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require "pristine_text/version"
2
2
  require "open3"
3
3
  require "unicode_utils"
4
- require "cgi"
4
+ require "htmlentities"
5
5
 
6
6
  module PristineText
7
7
  def self.pipe(text, locale)
@@ -24,7 +24,7 @@ module PristineText
24
24
  end
25
25
 
26
26
  def self.clean(text, locale= :en, stem= true)
27
- text= UnicodeUtils.downcase(CGI.unescapeHTML(text).gsub(/ /, "\n"), locale).
27
+ text= UnicodeUtils.downcase(HTMLEntities.new.decode(text), locale).
28
28
  gsub(/[^\p{Letter}\s]+/, "").
29
29
  strip.squeeze
30
30
  if stem
@@ -1,3 +1,3 @@
1
1
  module PristineText
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.7"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
- spec.add_dependency 'unicode_utils', '~> 0'
23
+ spec.add_dependency 'unicode_utils', '~> 1.4'
24
+ spec.add_dependency 'htmlentities', '~> 4.3'
24
25
  spec.required_ruby_version= '~> 2.1'
25
26
  end
@@ -1,9 +1,10 @@
1
+ #encoding: UTF-8
1
2
  require "pristine_text"
2
3
  require "minitest/autorun"
3
4
 
4
5
  class PristineTextTest< Minitest::Unit::TestCase
5
6
  def test_clean
6
7
  assert_equal PristineText.clean("haberler geliyorlar gidiyorlar", :tr), "haber geliyor gidiyor"
7
- assert_equal PristineText.clean("}{ÜĞ09&nbsp;İŞi!'^+çö\n\t\v][';.,üğişçö&quot;", :tr), "üğ işiçö üğişçö"
8
+ assert_equal PristineText.clean("}{ÜĞ09&nbsp;İŞi!'^+çö\n\t\v][';.,üğişçö&quot;", :tr), "üğişiçö üğişçö"
8
9
  end
9
10
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pristine_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nurettin Onur TUĞCU
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-01 00:00:00.000000000 Z
11
+ date: 2014-11-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -44,14 +44,28 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '1.4'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '1.4'
55
+ - !ruby/object:Gem::Dependency
56
+ name: htmlentities
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '4.3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '4.3'
55
69
  description: This gem uses unicode_utils to lowercase text, removes non-letters, strips
56
70
  and squeezes whitespace, then optionally uses stemwords (from libstemming-tools)
57
71
  to stem every word.