stringfu 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source "http://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in stringfu.gemspec
4
4
  gemspec
5
+ gem "uea-stemmer"
6
+
data/Gemfile.lock ADDED
@@ -0,0 +1,16 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ stringfu (0.0.3)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ uea-stemmer (0.10.1)
10
+
11
+ PLATFORMS
12
+ ruby
13
+
14
+ DEPENDENCIES
15
+ stringfu!
16
+ uea-stemmer
data/README.markdown CHANGED
@@ -1,3 +1,36 @@
1
1
  # StringFu
2
2
 
3
3
  StringFu are some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
4
+
5
+ ## Install StringFu
6
+
7
+ gem install stringfu
8
+
9
+ ## Usage
10
+
11
+ wtf = 'Charlie the Unicorn is a potty mouth. He\'ll say things like, @!@#% !@$%$[@#$^!)'
12
+
13
+ punc_strip will strip away all standard punctation.
14
+
15
+ wtf.punc_strip # => "Charlie the Unicorn is a potty mouth He'll say things like "
16
+
17
+ punc_gsub will replace punctionation with spaces if given no arguments
18
+
19
+ wtf.punc_gsub # => "Charlie the Unicorn is a potty mouth He'll say things like "
20
+
21
+ punc_gsub can also take arguments
22
+
23
+ wtf.punc_gsub "?" # => "Charlie the Unicorn is a potty mouth? He'll say things like? ????? ????????????"
24
+
25
+ ngrams will generate ngrams for any string and returns an array of numbers corresponding to the ngrams
26
+
27
+ ftw = "I choose Whoppie Goldberg for the Win!"
28
+ ftw.ngrams # => [1, 2, 3, 4, 5, 6, 7]
29
+
30
+ ftw.unigrams # => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
31
+ ftw.bigrams # => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
32
+ ftw.trigrams # => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
33
+ ftw._4grams # => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
34
+
35
+ ftw.ngrams 3..6 # => [3, 4, 5, 6]
36
+ ftw.ngrams 2, 7, 3..4 # => [2, 3, 4, 7]
@@ -1,3 +1,3 @@
1
1
  module Stringfu
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/stringfu.rb CHANGED
@@ -1,4 +1,8 @@
1
- require "stringfu/version"
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require 'stringfu/version'
5
+ require 'uea-stemmer'
2
6
 
3
7
  STOP = ["a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your"]
4
8
 
@@ -45,6 +49,7 @@ module Stringfu
45
49
  args.each do |num|
46
50
  prefix = latinfy num
47
51
  self.instance_variable_set "@#{prefix}grams", []
52
+
48
53
  rounds = wordphrase.length - num + 1
49
54
  rounds.times do |iter|
50
55
  self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
@@ -52,10 +57,30 @@ module Stringfu
52
57
  # Calls :attr_accessor to add new instance variable
53
58
  self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
54
59
  self.instance_variable_get("@#{prefix}grams")
60
+
61
+ self.class.__send__(:alias_method, "_#{num}grams", "#{prefix}grams") if num <= 3
62
+ self.class.__send__(:alias_method, "_#{num}grams=", "#{prefix}grams=") if num <= 3
55
63
  end
56
64
  args
57
65
  end
58
66
 
67
+ def stem
68
+ words = self.punc_strip.normalize.split
69
+
70
+ stemmer = UEAStemmer.new
71
+ words = words.map do |word|
72
+ stem = stemmer.stem word
73
+ end
74
+ end
75
+
76
+ def normalize
77
+ self.downcase.squeeze(" ").strip
78
+ end
79
+
80
+ def normalize!
81
+ replace(self.downcase.squeeze(" ").strip)
82
+ end
83
+
59
84
  private
60
85
  def latinfy num
61
86
  prefix = {1 => "uni", 2 => "bi", 3 => "tri"}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stringfu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -20,6 +20,7 @@ extra_rdoc_files: []
20
20
  files:
21
21
  - .gitignore
22
22
  - Gemfile
23
+ - Gemfile.lock
23
24
  - README.markdown
24
25
  - Rakefile
25
26
  - lib/stringfu.rb