stringfu 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODk5OGY2ZmJkNDA0MmRiOTMyMWU3MGZmMzVmNDg0M2M4MDdlNzAyNA==
5
+ data.tar.gz: !binary |-
6
+ MjVlZTA1ZTI5MmE4YTY2YTZiMzY3NjliMjY5Nzg4YTQ3OGZhOGQ1Yw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YjZkZTA3ZWNiNWIxN2ZiMDIxYTc3NWZkODJiNDcxYjE0ZmExM2IzNjk1OWQ4
10
+ NDI2OWM3NTZkYTJiZWNlYmJjNmM3ZjBmZmMxZTFlY2IzNWNlNzAzZTBlNzM5
11
+ YzA4OWU3ZTY0ODNkOTI2ZWQyOWQ2MzhlOTM4NTQ0MmYxYjIzMzM=
12
+ data.tar.gz: !binary |-
13
+ MTEyYWRmNmZlNjRhYzUwMzZjMmQyMWY0ODk2Yjg0ZTk0ODliZDY5ODNkNmRj
14
+ ZTY0M2VkMDI3OGIwYmFkNTNlZTgwNDMzNDNhOGQ1Y2VmODVhYTM5ZDBlODZm
15
+ OTRmOGU1OTNlYTRhZThjYjhkMDgyNjM1YzE0Nzk2Y2NlNjQ5NjA=
data/.gitignore CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/README.markdown CHANGED
@@ -1,11 +1,17 @@
1
- # StringFu
1
+ # stringfu
2
2
 
3
- StringFu are some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
3
+ stringfu is a ruby gem with some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
4
4
 
5
- ## Install StringFu
5
+ ## Install stringfu
6
6
 
7
7
  gem install stringfu
8
8
 
9
+ ### Dependencies
10
+
11
+ UEA Stemmer for Porter Stemming: https://github.com/ealdent/uea-stemmer
12
+
13
+ gem install uea-stemmer
14
+
9
15
  ## Usage
10
16
 
11
17
  wtf = 'Charlie the Unicorn is a potty mouth. He\'ll say things like, @!@#% !@$%$[@#$^!)'
@@ -22,15 +28,40 @@ punc_gsub can also take arguments
22
28
 
23
29
  wtf.punc_gsub "?" # => "Charlie the Unicorn is a potty mouth? He'll say things like? ????? ????????????"
24
30
 
25
- ngrams will generate ngrams for any string and returns an array of numbers corresponding to the ngrams
31
+ ngrams will generate ngrams (unigrams, bigrams, trigrams, four-grams, etc.) for any string and returns the max number of ngrams.
26
32
 
27
33
  ftw = "I choose Whoppie Goldberg for the Win!"
28
- ftw.ngrams # => [1, 2, 3, 4, 5, 6, 7]
34
+ ftw.ngrams # => 7
35
+
36
+ ftw.unigrams
37
+ ftw._1grams
38
+ # => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
39
+
40
+ ftw.bigrams
41
+ ftw._2grams
42
+ # => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
29
43
 
30
- ftw.unigrams # => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
31
- ftw.bigrams # => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
32
- ftw.trigrams # => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
33
- ftw._4grams # => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
44
+ ftw.trigrams
45
+ ftw._3grams
46
+ # => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
47
+
48
+ ftw._4grams
49
+ # => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
34
50
 
35
51
  ftw.ngrams 3..6 # => [3, 4, 5, 6]
36
- ftw.ngrams 2, 7, 3..4 # => [2, 3, 4, 7]
52
+ ftw.ngrams 2, 7, 2..4 # => [2, 3, 4, 7]
53
+
54
+ stemming a string will output an array of the Porter Stemmed words
55
+
56
+ "ZOMG! I hearted installing headlights into used decepticons".stem
57
+ # => ["zomg", "i", "heart", "instal", "headlight", "into", "use", "decepticon"]
58
+
59
+ ## Todo
60
+
61
+ * ngrams
62
+ * add option to make punctation it's own -gram
63
+ * add option to -gramify individual words into characters / syllables
64
+ * add parts of speech (POS) integration to drop types (noun, adj, verbs) from ngrams
65
+ * stemming
66
+ * add pluralize and singularize methods
67
+ * add parts of speech (POS) integration so you can pluarlize and singularize just nouns (NN, NNS, NNP)
data/Rakefile CHANGED
File without changes
data/lib/stringfu.rb CHANGED
@@ -41,7 +41,7 @@ module Stringfu
41
41
  if args.size == 0
42
42
  args = [1..wordphrase.length]
43
43
  end
44
-
44
+
45
45
  # Inspired by http://www.rubyquiz.com/quiz4.html
46
46
  args = args.map { |arg| Array(arg) }.flatten.uniq.sort
47
47
  args = args.select { |arg| arg <= wordphrase.length and arg > 0 }
@@ -50,10 +50,10 @@ module Stringfu
50
50
  prefix = latinfy num
51
51
  self.instance_variable_set "@#{prefix}grams", []
52
52
 
53
- rounds = wordphrase.length - num + 1
54
- rounds.times do |iter|
55
- self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
53
+ wordphrase.each_cons(num) do |words|
54
+ self.instance_variable_get("@#{prefix}grams") << words.join(" ")
56
55
  end
56
+
57
57
  # Calls :attr_accessor to add new instance variable
58
58
  self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
59
59
  self.instance_variable_get("@#{prefix}grams")
@@ -1,3 +1,3 @@
1
1
  module Stringfu
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.8"
3
3
  end
data/stringfu.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
  s.email = ["me@eywu.com"]
10
10
  s.homepage = "http://stringfu.com"
11
11
  s.summary = %q{Manipulating words like a grandmasta}
12
- s.description = %q{Extending String with methods to stem, count, and clean words}
12
+ s.description = %q{Extending the String Class with methods to stem, count, and clean words}
13
13
 
14
14
  s.rubyforge_project = "stringfu"
15
15
 
metadata CHANGED
@@ -1,17 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stringfu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
5
- prerelease:
4
+ version: 0.0.8
6
5
  platform: ruby
7
6
  authors:
8
7
  - Eric Wu
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2011-09-20 00:00:00.000000000Z
11
+ date: 2013-09-30 00:00:00.000000000 Z
13
12
  dependencies: []
14
- description: Extending String with methods to stem, count, and clean words
13
+ description: Extending the String Class with methods to stem, count, and clean words
15
14
  email:
16
15
  - me@eywu.com
17
16
  executables: []
@@ -28,26 +27,25 @@ files:
28
27
  - stringfu.gemspec
29
28
  homepage: http://stringfu.com
30
29
  licenses: []
30
+ metadata: {}
31
31
  post_install_message:
32
32
  rdoc_options: []
33
33
  require_paths:
34
34
  - lib
35
35
  required_ruby_version: !ruby/object:Gem::Requirement
36
- none: false
37
36
  requirements:
38
37
  - - ! '>='
39
38
  - !ruby/object:Gem::Version
40
39
  version: '0'
41
40
  required_rubygems_version: !ruby/object:Gem::Requirement
42
- none: false
43
41
  requirements:
44
42
  - - ! '>='
45
43
  - !ruby/object:Gem::Version
46
44
  version: '0'
47
45
  requirements: []
48
46
  rubyforge_project: stringfu
49
- rubygems_version: 1.8.10
47
+ rubygems_version: 2.1.5
50
48
  signing_key:
51
- specification_version: 3
49
+ specification_version: 4
52
50
  summary: Manipulating words like a grandmasta
53
51
  test_files: []