stringfu 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/README.markdown +41 -10
- data/Rakefile +0 -0
- data/lib/stringfu.rb +4 -4
- data/lib/stringfu/version.rb +1 -1
- data/stringfu.gemspec +1 -1
- metadata +6 -8
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ODk5OGY2ZmJkNDA0MmRiOTMyMWU3MGZmMzVmNDg0M2M4MDdlNzAyNA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjVlZTA1ZTI5MmE4YTY2YTZiMzY3NjliMjY5Nzg4YTQ3OGZhOGQ1Yw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YjZkZTA3ZWNiNWIxN2ZiMDIxYTc3NWZkODJiNDcxYjE0ZmExM2IzNjk1OWQ4
|
10
|
+
NDI2OWM3NTZkYTJiZWNlYmJjNmM3ZjBmZmMxZTFlY2IzNWNlNzAzZTBlNzM5
|
11
|
+
YzA4OWU3ZTY0ODNkOTI2ZWQyOWQ2MzhlOTM4NTQ0MmYxYjIzMzM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MTEyYWRmNmZlNjRhYzUwMzZjMmQyMWY0ODk2Yjg0ZTk0ODliZDY5ODNkNmRj
|
14
|
+
ZTY0M2VkMDI3OGIwYmFkNTNlZTgwNDMzNDNhOGQ1Y2VmODVhYTM5ZDBlODZm
|
15
|
+
OTRmOGU1OTNlYTRhZThjYjhkMDgyNjM1YzE0Nzk2Y2NlNjQ5NjA=
|
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/README.markdown
CHANGED
@@ -1,11 +1,17 @@
|
|
1
|
-
#
|
1
|
+
# stringfu
|
2
2
|
|
3
|
-
|
3
|
+
stringfu is a ruby gem with some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
|
4
4
|
|
5
|
-
## Install
|
5
|
+
## Install stringfu
|
6
6
|
|
7
7
|
gem install stringfu
|
8
8
|
|
9
|
+
### Dependencies
|
10
|
+
|
11
|
+
UEA Stemmer for Porter Stemming: https://github.com/ealdent/uea-stemmer
|
12
|
+
|
13
|
+
gem install uea-stemmer
|
14
|
+
|
9
15
|
## Usage
|
10
16
|
|
11
17
|
wtf = 'Charlie the Unicorn is a potty mouth. He\'ll say things like, @!@#% !@$%$[@#$^!)'
|
@@ -22,15 +28,40 @@ punc_gsub can also take arguments
|
|
22
28
|
|
23
29
|
wtf.punc_gsub "?" # => "Charlie the Unicorn is a potty mouth? He'll say things like? ????? ????????????"
|
24
30
|
|
25
|
-
ngrams will generate ngrams for any string and returns
|
31
|
+
ngrams will generate ngrams (unigrams, bigrams, trigrams, four-grams, etc.) for any string and returns the max number of ngrams.
|
26
32
|
|
27
33
|
ftw = "I choose Whoppie Goldberg for the Win!"
|
28
|
-
ftw.ngrams # =>
|
34
|
+
ftw.ngrams # => 7
|
35
|
+
|
36
|
+
ftw.unigrams
|
37
|
+
ftw._1grams
|
38
|
+
# => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
|
39
|
+
|
40
|
+
ftw.bigrams
|
41
|
+
ftw._2grams
|
42
|
+
# => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
|
29
43
|
|
30
|
-
ftw.
|
31
|
-
ftw.
|
32
|
-
|
33
|
-
|
44
|
+
ftw.trigrams
|
45
|
+
ftw._3grams
|
46
|
+
# => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
|
47
|
+
|
48
|
+
ftw._4grams
|
49
|
+
# => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
|
34
50
|
|
35
51
|
ftw.ngrams 3..6 # => [3, 4, 5, 6]
|
36
|
-
ftw.ngrams 2, 7,
|
52
|
+
ftw.ngrams 2, 7, 2..4 # => [2, 3, 4, 7]
|
53
|
+
|
54
|
+
stemming a string will output an array of the Porter Stemmed words
|
55
|
+
|
56
|
+
"ZOMG! I hearted installing headlights into used decepticons".stem
|
57
|
+
# => ["zomg", "i", "heart", "instal", "headlight", "into", "use", "decepticon"]
|
58
|
+
|
59
|
+
## Todo
|
60
|
+
|
61
|
+
* ngrams
|
62
|
+
* add option to make punctation it's own -gram
|
63
|
+
* add option to -gramify individual words into characters / syllables
|
64
|
+
* add parts of speech (POS) integration to drop types (noun, adj, verbs) from ngrams
|
65
|
+
* stemming
|
66
|
+
* add pluralize and singularize methods
|
67
|
+
* add parts of speech (POS) integration so you can pluarlize and singularize just nouns (NN, NNS, NNP)
|
data/Rakefile
CHANGED
File without changes
|
data/lib/stringfu.rb
CHANGED
@@ -41,7 +41,7 @@ module Stringfu
|
|
41
41
|
if args.size == 0
|
42
42
|
args = [1..wordphrase.length]
|
43
43
|
end
|
44
|
-
|
44
|
+
|
45
45
|
# Inspired by http://www.rubyquiz.com/quiz4.html
|
46
46
|
args = args.map { |arg| Array(arg) }.flatten.uniq.sort
|
47
47
|
args = args.select { |arg| arg <= wordphrase.length and arg > 0 }
|
@@ -50,10 +50,10 @@ module Stringfu
|
|
50
50
|
prefix = latinfy num
|
51
51
|
self.instance_variable_set "@#{prefix}grams", []
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
|
53
|
+
wordphrase.each_cons(num) do |words|
|
54
|
+
self.instance_variable_get("@#{prefix}grams") << words.join(" ")
|
56
55
|
end
|
56
|
+
|
57
57
|
# Calls :attr_accessor to add new instance variable
|
58
58
|
self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
|
59
59
|
self.instance_variable_get("@#{prefix}grams")
|
data/lib/stringfu/version.rb
CHANGED
data/stringfu.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.email = ["me@eywu.com"]
|
10
10
|
s.homepage = "http://stringfu.com"
|
11
11
|
s.summary = %q{Manipulating words like a grandmasta}
|
12
|
-
s.description = %q{Extending String with methods to stem, count, and clean words}
|
12
|
+
s.description = %q{Extending the String Class with methods to stem, count, and clean words}
|
13
13
|
|
14
14
|
s.rubyforge_project = "stringfu"
|
15
15
|
|
metadata
CHANGED
@@ -1,17 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stringfu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.8
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Eric Wu
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-09-30 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
|
-
description: Extending String with methods to stem, count, and clean words
|
13
|
+
description: Extending the String Class with methods to stem, count, and clean words
|
15
14
|
email:
|
16
15
|
- me@eywu.com
|
17
16
|
executables: []
|
@@ -28,26 +27,25 @@ files:
|
|
28
27
|
- stringfu.gemspec
|
29
28
|
homepage: http://stringfu.com
|
30
29
|
licenses: []
|
30
|
+
metadata: {}
|
31
31
|
post_install_message:
|
32
32
|
rdoc_options: []
|
33
33
|
require_paths:
|
34
34
|
- lib
|
35
35
|
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
-
none: false
|
37
36
|
requirements:
|
38
37
|
- - ! '>='
|
39
38
|
- !ruby/object:Gem::Version
|
40
39
|
version: '0'
|
41
40
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
41
|
requirements:
|
44
42
|
- - ! '>='
|
45
43
|
- !ruby/object:Gem::Version
|
46
44
|
version: '0'
|
47
45
|
requirements: []
|
48
46
|
rubyforge_project: stringfu
|
49
|
-
rubygems_version: 1.
|
47
|
+
rubygems_version: 2.1.5
|
50
48
|
signing_key:
|
51
|
-
specification_version:
|
49
|
+
specification_version: 4
|
52
50
|
summary: Manipulating words like a grandmasta
|
53
51
|
test_files: []
|