stringfu 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source "http://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in stringfu.gemspec
4
4
  gemspec
5
+ gem "uea-stemmer"
6
+
data/Gemfile.lock ADDED
@@ -0,0 +1,16 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ stringfu (0.0.3)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ uea-stemmer (0.10.1)
10
+
11
+ PLATFORMS
12
+ ruby
13
+
14
+ DEPENDENCIES
15
+ stringfu!
16
+ uea-stemmer
data/README.markdown CHANGED
@@ -1,3 +1,36 @@
1
1
  # StringFu
2
2
 
3
3
  StringFu are some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
4
+
5
+ ## Install StringFu
6
+
7
+ gem install stringfu
8
+
9
+ ## Usage
10
+
11
+ wtf = 'Charlie the Unicorn is a potty mouth. He\'ll say things like, @!@#% !@$%$[@#$^!)'
12
+
13
+ punc_strip will strip away all standard punctation.
14
+
15
+ wtf.punc_strip # => "Charlie the Unicorn is a potty mouth He'll say things like "
16
+
17
+ punc_gsub will replace punctionation with spaces if given no arguments
18
+
19
+ wtf.punc_gsub # => "Charlie the Unicorn is a potty mouth He'll say things like "
20
+
21
+ punc_gsub can also take arguments
22
+
23
+ wtf.punc_gsub "?" # => "Charlie the Unicorn is a potty mouth? He'll say things like? ????? ????????????"
24
+
25
+ ngrams will generate ngrams for any string and returns an array of numbers corresponding to the ngrams
26
+
27
+ ftw = "I choose Whoppie Goldberg for the Win!"
28
+ ftw.ngrams # => [1, 2, 3, 4, 5, 6, 7]
29
+
30
+ ftw.unigrams # => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
31
+ ftw.bigrams # => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
32
+ ftw.trigrams # => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
33
+ ftw._4grams # => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
34
+
35
+ ftw.ngrams 3..6 # => [3, 4, 5, 6]
36
+ ftw.ngrams 2, 7, 3..4 # => [2, 3, 4, 7]
@@ -1,3 +1,3 @@
1
1
  module Stringfu
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/stringfu.rb CHANGED
@@ -1,4 +1,8 @@
1
- require "stringfu/version"
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require 'stringfu/version'
5
+ require 'uea-stemmer'
2
6
 
3
7
  STOP = ["a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your"]
4
8
 
@@ -45,6 +49,7 @@ module Stringfu
45
49
  args.each do |num|
46
50
  prefix = latinfy num
47
51
  self.instance_variable_set "@#{prefix}grams", []
52
+
48
53
  rounds = wordphrase.length - num + 1
49
54
  rounds.times do |iter|
50
55
  self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
@@ -52,10 +57,30 @@ module Stringfu
52
57
  # Calls :attr_accessor to add new instance variable
53
58
  self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
54
59
  self.instance_variable_get("@#{prefix}grams")
60
+
61
+ self.class.__send__(:alias_method, "_#{num}grams", "#{prefix}grams") if num <= 3
62
+ self.class.__send__(:alias_method, "_#{num}grams=", "#{prefix}grams=") if num <= 3
55
63
  end
56
64
  args
57
65
  end
58
66
 
67
+ def stem
68
+ words = self.punc_strip.normalize.split
69
+
70
+ stemmer = UEAStemmer.new
71
+ words = words.map do |word|
72
+ stem = stemmer.stem word
73
+ end
74
+ end
75
+
76
+ def normalize
77
+ self.downcase.squeeze(" ").strip
78
+ end
79
+
80
+ def normalize!
81
+ replace(self.downcase.squeeze(" ").strip)
82
+ end
83
+
59
84
  private
60
85
  def latinfy num
61
86
  prefix = {1 => "uni", 2 => "bi", 3 => "tri"}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stringfu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -20,6 +20,7 @@ extra_rdoc_files: []
20
20
  files:
21
21
  - .gitignore
22
22
  - Gemfile
23
+ - Gemfile.lock
23
24
  - README.markdown
24
25
  - Rakefile
25
26
  - lib/stringfu.rb