stringfu 0.0.6 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODk5OGY2ZmJkNDA0MmRiOTMyMWU3MGZmMzVmNDg0M2M4MDdlNzAyNA==
5
+ data.tar.gz: !binary |-
6
+ MjVlZTA1ZTI5MmE4YTY2YTZiMzY3NjliMjY5Nzg4YTQ3OGZhOGQ1Yw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YjZkZTA3ZWNiNWIxN2ZiMDIxYTc3NWZkODJiNDcxYjE0ZmExM2IzNjk1OWQ4
10
+ NDI2OWM3NTZkYTJiZWNlYmJjNmM3ZjBmZmMxZTFlY2IzNWNlNzAzZTBlNzM5
11
+ YzA4OWU3ZTY0ODNkOTI2ZWQyOWQ2MzhlOTM4NTQ0MmYxYjIzMzM=
12
+ data.tar.gz: !binary |-
13
+ MTEyYWRmNmZlNjRhYzUwMzZjMmQyMWY0ODk2Yjg0ZTk0ODliZDY5ODNkNmRj
14
+ ZTY0M2VkMDI3OGIwYmFkNTNlZTgwNDMzNDNhOGQ1Y2VmODVhYTM5ZDBlODZm
15
+ OTRmOGU1OTNlYTRhZThjYjhkMDgyNjM1YzE0Nzk2Y2NlNjQ5NjA=
data/.gitignore CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/README.markdown CHANGED
@@ -1,11 +1,17 @@
1
- # StringFu
1
+ # stringfu
2
2
 
3
- StringFu are some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
3
+ stringfu is a ruby gem with some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
4
4
 
5
- ## Install StringFu
5
+ ## Install stringfu
6
6
 
7
7
  gem install stringfu
8
8
 
9
+ ### Dependencies
10
+
11
+ UEA Stemmer for Porter Stemming: https://github.com/ealdent/uea-stemmer
12
+
13
+ gem install uea-stemmer
14
+
9
15
  ## Usage
10
16
 
11
17
  wtf = 'Charlie the Unicorn is a potty mouth. He\'ll say things like, @!@#% !@$%$[@#$^!)'
@@ -22,15 +28,40 @@ punc_gsub can also take arguments
22
28
 
23
29
  wtf.punc_gsub "?" # => "Charlie the Unicorn is a potty mouth? He'll say things like? ????? ????????????"
24
30
 
25
- ngrams will generate ngrams for any string and returns an array of numbers corresponding to the ngrams
31
+ ngrams will generate ngrams (unigrams, bigrams, trigrams, four-grams, etc.) for any string and returns the max number of ngrams.
26
32
 
27
33
  ftw = "I choose Whoppie Goldberg for the Win!"
28
- ftw.ngrams # => [1, 2, 3, 4, 5, 6, 7]
34
+ ftw.ngrams # => 7
35
+
36
+ ftw.unigrams
37
+ ftw._1grams
38
+ # => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
39
+
40
+ ftw.bigrams
41
+ ftw._2grams
42
+ # => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
29
43
 
30
- ftw.unigrams # => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
31
- ftw.bigrams # => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
32
- ftw.trigrams # => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
33
- ftw._4grams # => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
44
+ ftw.trigrams
45
+ ftw._3grams
46
+ # => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
47
+
48
+ ftw._4grams
49
+ # => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
34
50
 
35
51
  ftw.ngrams 3..6 # => [3, 4, 5, 6]
36
- ftw.ngrams 2, 7, 3..4 # => [2, 3, 4, 7]
52
+ ftw.ngrams 2, 7, 2..4 # => [2, 3, 4, 7]
53
+
54
+ stemming a string will output an array of the Porter Stemmed words
55
+
56
+ "ZOMG! I hearted installing headlights into used decepticons".stem
57
+ # => ["zomg", "i", "heart", "instal", "headlight", "into", "use", "decepticon"]
58
+
59
+ ## Todo
60
+
61
+ * ngrams
62
+ * add option to make punctation it's own -gram
63
+ * add option to -gramify individual words into characters / syllables
64
+ * add parts of speech (POS) integration to drop types (noun, adj, verbs) from ngrams
65
+ * stemming
66
+ * add pluralize and singularize methods
67
+ * add parts of speech (POS) integration so you can pluarlize and singularize just nouns (NN, NNS, NNP)
data/Rakefile CHANGED
File without changes
data/lib/stringfu.rb CHANGED
@@ -41,7 +41,7 @@ module Stringfu
41
41
  if args.size == 0
42
42
  args = [1..wordphrase.length]
43
43
  end
44
-
44
+
45
45
  # Inspired by http://www.rubyquiz.com/quiz4.html
46
46
  args = args.map { |arg| Array(arg) }.flatten.uniq.sort
47
47
  args = args.select { |arg| arg <= wordphrase.length and arg > 0 }
@@ -50,10 +50,10 @@ module Stringfu
50
50
  prefix = latinfy num
51
51
  self.instance_variable_set "@#{prefix}grams", []
52
52
 
53
- rounds = wordphrase.length - num + 1
54
- rounds.times do |iter|
55
- self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
53
+ wordphrase.each_cons(num) do |words|
54
+ self.instance_variable_get("@#{prefix}grams") << words.join(" ")
56
55
  end
56
+
57
57
  # Calls :attr_accessor to add new instance variable
58
58
  self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
59
59
  self.instance_variable_get("@#{prefix}grams")
@@ -1,3 +1,3 @@
1
1
  module Stringfu
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.8"
3
3
  end
data/stringfu.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
  s.email = ["me@eywu.com"]
10
10
  s.homepage = "http://stringfu.com"
11
11
  s.summary = %q{Manipulating words like a grandmasta}
12
- s.description = %q{Extending String with methods to stem, count, and clean words}
12
+ s.description = %q{Extending the String Class with methods to stem, count, and clean words}
13
13
 
14
14
  s.rubyforge_project = "stringfu"
15
15
 
metadata CHANGED
@@ -1,17 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stringfu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
5
- prerelease:
4
+ version: 0.0.8
6
5
  platform: ruby
7
6
  authors:
8
7
  - Eric Wu
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2011-09-20 00:00:00.000000000Z
11
+ date: 2013-09-30 00:00:00.000000000 Z
13
12
  dependencies: []
14
- description: Extending String with methods to stem, count, and clean words
13
+ description: Extending the String Class with methods to stem, count, and clean words
15
14
  email:
16
15
  - me@eywu.com
17
16
  executables: []
@@ -28,26 +27,25 @@ files:
28
27
  - stringfu.gemspec
29
28
  homepage: http://stringfu.com
30
29
  licenses: []
30
+ metadata: {}
31
31
  post_install_message:
32
32
  rdoc_options: []
33
33
  require_paths:
34
34
  - lib
35
35
  required_ruby_version: !ruby/object:Gem::Requirement
36
- none: false
37
36
  requirements:
38
37
  - - ! '>='
39
38
  - !ruby/object:Gem::Version
40
39
  version: '0'
41
40
  required_rubygems_version: !ruby/object:Gem::Requirement
42
- none: false
43
41
  requirements:
44
42
  - - ! '>='
45
43
  - !ruby/object:Gem::Version
46
44
  version: '0'
47
45
  requirements: []
48
46
  rubyforge_project: stringfu
49
- rubygems_version: 1.8.10
47
+ rubygems_version: 2.1.5
50
48
  signing_key:
51
- specification_version: 3
49
+ specification_version: 4
52
50
  summary: Manipulating words like a grandmasta
53
51
  test_files: []