stringfu 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown ADDED
@@ -0,0 +1,3 @@
1
+ # StringFu
2
+
3
+ StringFu are some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
@@ -1,3 +1,3 @@
1
1
  module Stringfu
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/stringfu.rb CHANGED
@@ -1,5 +1,66 @@
1
1
  require "stringfu/version"
2
2
 
3
- module Stringfu
4
- # Your code goes here...
3
+ class String
4
+ include StringFu
5
+ end
6
+
7
+ module StringFu
8
+ def punc_strip
9
+ self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,'').gsub(/^'/,'')
10
+ end
11
+
12
+ def punc_strip!
13
+ replace(self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,'').gsub(/^'/,''))
14
+ end
15
+
16
+ def punc_sub pattern=' '
17
+ self.sub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).sub(/^'/,pattern)
18
+ end
19
+
20
+ def punc_sub! pattern=' '
21
+ replace(self.sub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).sub(/^'/,pattern))
22
+ end
23
+
24
+ def punc_gsub pattern=' '
25
+ self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).gsub(/^'/,pattern)
26
+ end
27
+
28
+ def punc_gsub! pattern=' '
29
+ replace(self.gsub(/[!@#\$%^&*;:,<.>?\/|+=\[\]"{}()_-]/,pattern).gsub(/^'/,pattern))
30
+ end
31
+
32
+ def ngrams *args
33
+ wordphrase = self.split
34
+
35
+ if args.size == 0
36
+ args = [1..wordphrase.length]
37
+ end
38
+
39
+ # Inspired by http://www.rubyquiz.com/quiz4.html
40
+ args = args.map { |arg| Array(arg) }.flatten.uniq.sort
41
+ args = args.select { |arg| arg <= wordphrase.length and arg > 0 }
42
+
43
+ args.each do |num|
44
+ prefix = latinfy num
45
+ self.instance_variable_set "@#{prefix}grams", []
46
+ rounds = wordphrase.length - num + 1
47
+ rounds.times do |iter|
48
+ self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
49
+ end
50
+ # Calls :attr_accessor to add new instance variable
51
+ self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
52
+ self.instance_variable_get("@#{prefix}grams")
53
+ end
54
+ args
55
+ end
56
+
57
+ private
58
+ def latinfy num
59
+ prefix = {1 => "uni", 2 => "bi", 3 => "tri"}
60
+ if prefix.has_key? num
61
+ prefix[num]
62
+ else
63
+ "_#{num}"
64
+ end
65
+ end
5
66
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stringfu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-19 00:00:00.000000000Z
12
+ date: 2011-09-20 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: Extending String with methods to stem, count, and clean words
15
15
  email:
@@ -20,6 +20,7 @@ extra_rdoc_files: []
20
20
  files:
21
21
  - .gitignore
22
22
  - Gemfile
23
+ - README.markdown
23
24
  - Rakefile
24
25
  - lib/stringfu.rb
25
26
  - lib/stringfu/version.rb