stringfu 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown ADDED
@@ -0,0 +1,3 @@
1
+ # StringFu
2
+
3
+ StringFu are some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
@@ -1,3 +1,3 @@
1
1
  module Stringfu
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/stringfu.rb CHANGED
@@ -1,5 +1,66 @@
1
1
  require "stringfu/version"
2
2
 
3
- module Stringfu
4
- # Your code goes here...
3
+ class String
4
+ include StringFu
5
+ end
6
+
7
+ module StringFu
8
+ def punc_strip
9
+ self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,'').gsub(/^'/,'')
10
+ end
11
+
12
+ def punc_strip!
13
+ replace(self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,'').gsub(/^'/,''))
14
+ end
15
+
16
+ def punc_sub pattern=' '
17
+ self.sub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).sub(/^'/,pattern)
18
+ end
19
+
20
+ def punc_sub! pattern=' '
21
+ replace(self.sub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).sub(/^'/,pattern))
22
+ end
23
+
24
+ def punc_gsub pattern=' '
25
+ self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).gsub(/^'/,pattern)
26
+ end
27
+
28
+ def punc_gsub! pattern=' '
29
+ replace(self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).gsub(/^'/,pattern))
30
+ end
31
+
32
+ def ngrams *args
33
+ wordphrase = self.split
34
+
35
+ if args.size == 0
36
+ args = [1..wordphrase.length]
37
+ end
38
+
39
+ # Inspired by http://www.rubyquiz.com/quiz4.html
40
+ args = args.map { |arg| Array(arg) }.flatten.uniq.sort
41
+ args = args.select { |arg| arg <= wordphrase.length and arg > 0 }
42
+
43
+ args.each do |num|
44
+ prefix = latinfy num
45
+ self.instance_variable_set "@#{prefix}grams", []
46
+ rounds = wordphrase.length - num + 1
47
+ rounds.times do |iter|
48
+ self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
49
+ end
50
+ # Calls :attr_accessor to add new instance variable
51
+ self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
52
+ self.instance_variable_get("@#{prefix}grams")
53
+ end
54
+ args
55
+ end
56
+
57
+ private
58
+ def latinfy num
59
+ prefix = {1 => "uni", 2 => "bi", 3 => "tri"}
60
+ if prefix.has_key? num
61
+ prefix[num]
62
+ else
63
+ "_#{num}"
64
+ end
65
+ end
5
66
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stringfu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-19 00:00:00.000000000Z
12
+ date: 2011-09-20 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: Extending String with methods to stem, count, and clean words
15
15
  email:
@@ -20,6 +20,7 @@ extra_rdoc_files: []
20
20
  files:
21
21
  - .gitignore
22
22
  - Gemfile
23
+ - README.markdown
23
24
  - Rakefile
24
25
  - lib/stringfu.rb
25
26
  - lib/stringfu/version.rb