stringfu 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/README.markdown +41 -10
- data/Rakefile +0 -0
- data/lib/stringfu.rb +4 -4
- data/lib/stringfu/version.rb +1 -1
- data/stringfu.gemspec +1 -1
- metadata +6 -8
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ODk5OGY2ZmJkNDA0MmRiOTMyMWU3MGZmMzVmNDg0M2M4MDdlNzAyNA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjVlZTA1ZTI5MmE4YTY2YTZiMzY3NjliMjY5Nzg4YTQ3OGZhOGQ1Yw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YjZkZTA3ZWNiNWIxN2ZiMDIxYTc3NWZkODJiNDcxYjE0ZmExM2IzNjk1OWQ4
|
10
|
+
NDI2OWM3NTZkYTJiZWNlYmJjNmM3ZjBmZmMxZTFlY2IzNWNlNzAzZTBlNzM5
|
11
|
+
YzA4OWU3ZTY0ODNkOTI2ZWQyOWQ2MzhlOTM4NTQ0MmYxYjIzMzM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MTEyYWRmNmZlNjRhYzUwMzZjMmQyMWY0ODk2Yjg0ZTk0ODliZDY5ODNkNmRj
|
14
|
+
ZTY0M2VkMDI3OGIwYmFkNTNlZTgwNDMzNDNhOGQ1Y2VmODVhYTM5ZDBlODZm
|
15
|
+
OTRmOGU1OTNlYTRhZThjYjhkMDgyNjM1YzE0Nzk2Y2NlNjQ5NjA=
|
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/README.markdown
CHANGED
@@ -1,11 +1,17 @@
|
|
1
|
-
#
|
1
|
+
# stringfu
|
2
2
|
|
3
|
-
|
3
|
+
stringfu is a ruby gem with some simple methods to manipulate strings in order for them to be cleaned up for Natural Language Processing (NLP).
|
4
4
|
|
5
|
-
## Install
|
5
|
+
## Install stringfu
|
6
6
|
|
7
7
|
gem install stringfu
|
8
8
|
|
9
|
+
### Dependencies
|
10
|
+
|
11
|
+
UEA Stemmer for Porter Stemming: https://github.com/ealdent/uea-stemmer
|
12
|
+
|
13
|
+
gem install uea-stemmer
|
14
|
+
|
9
15
|
## Usage
|
10
16
|
|
11
17
|
wtf = 'Charlie the Unicorn is a potty mouth. He\'ll say things like, @!@#% !@$%$[@#$^!)'
|
@@ -22,15 +28,40 @@ punc_gsub can also take arguments
|
|
22
28
|
|
23
29
|
wtf.punc_gsub "?" # => "Charlie the Unicorn is a potty mouth? He'll say things like? ????? ????????????"
|
24
30
|
|
25
|
-
ngrams will generate ngrams for any string and returns
|
31
|
+
ngrams will generate ngrams (unigrams, bigrams, trigrams, four-grams, etc.) for any string and returns the max number of ngrams.
|
26
32
|
|
27
33
|
ftw = "I choose Whoppie Goldberg for the Win!"
|
28
|
-
ftw.ngrams # =>
|
34
|
+
ftw.ngrams # => 7
|
35
|
+
|
36
|
+
ftw.unigrams
|
37
|
+
ftw._1grams
|
38
|
+
# => ["I", "choose", "Whoppie", "Goldberg", "for", "the", "Win!"]
|
39
|
+
|
40
|
+
ftw.bigrams
|
41
|
+
ftw._2grams
|
42
|
+
# => ["I choose", "choose Whoppie", "Whoppie Goldberg", "Goldberg for", "for the", "the Win!"]
|
29
43
|
|
30
|
-
ftw.
|
31
|
-
ftw.
|
32
|
-
|
33
|
-
|
44
|
+
ftw.trigrams
|
45
|
+
ftw._3grams
|
46
|
+
# => ["I choose Whoppie", "choose Whoppie Goldberg", "Whoppie Goldberg for", "Goldberg for the", "for the Win!"]
|
47
|
+
|
48
|
+
ftw._4grams
|
49
|
+
# => ["I choose Whoppie Goldberg", "choose Whoppie Goldberg for", "Whoppie Goldberg for the", "Goldberg for the Win!"]
|
34
50
|
|
35
51
|
ftw.ngrams 3..6 # => [3, 4, 5, 6]
|
36
|
-
ftw.ngrams 2, 7,
|
52
|
+
ftw.ngrams 2, 7, 2..4 # => [2, 3, 4, 7]
|
53
|
+
|
54
|
+
stemming a string will output an array of the Porter Stemmed words
|
55
|
+
|
56
|
+
"ZOMG! I hearted installing headlights into used decepticons".stem
|
57
|
+
# => ["zomg", "i", "heart", "instal", "headlight", "into", "use", "decepticon"]
|
58
|
+
|
59
|
+
## Todo
|
60
|
+
|
61
|
+
* ngrams
|
62
|
+
* add option to make punctation it's own -gram
|
63
|
+
* add option to -gramify individual words into characters / syllables
|
64
|
+
* add parts of speech (POS) integration to drop types (noun, adj, verbs) from ngrams
|
65
|
+
* stemming
|
66
|
+
* add pluralize and singularize methods
|
67
|
+
* add parts of speech (POS) integration so you can pluarlize and singularize just nouns (NN, NNS, NNP)
|
data/Rakefile
CHANGED
File without changes
|
data/lib/stringfu.rb
CHANGED
@@ -41,7 +41,7 @@ module Stringfu
|
|
41
41
|
if args.size == 0
|
42
42
|
args = [1..wordphrase.length]
|
43
43
|
end
|
44
|
-
|
44
|
+
|
45
45
|
# Inspired by http://www.rubyquiz.com/quiz4.html
|
46
46
|
args = args.map { |arg| Array(arg) }.flatten.uniq.sort
|
47
47
|
args = args.select { |arg| arg <= wordphrase.length and arg > 0 }
|
@@ -50,10 +50,10 @@ module Stringfu
|
|
50
50
|
prefix = latinfy num
|
51
51
|
self.instance_variable_set "@#{prefix}grams", []
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
self.instance_variable_get("@#{prefix}grams") << wordphrase[iter...(iter+num)].join(" ")
|
53
|
+
wordphrase.each_cons(num) do |words|
|
54
|
+
self.instance_variable_get("@#{prefix}grams") << words.join(" ")
|
56
55
|
end
|
56
|
+
|
57
57
|
# Calls :attr_accessor to add new instance variable
|
58
58
|
self.class.__send__(:attr_accessor, "#{prefix}grams".to_sym)
|
59
59
|
self.instance_variable_get("@#{prefix}grams")
|
data/lib/stringfu/version.rb
CHANGED
data/stringfu.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.email = ["me@eywu.com"]
|
10
10
|
s.homepage = "http://stringfu.com"
|
11
11
|
s.summary = %q{Manipulating words like a grandmasta}
|
12
|
-
s.description = %q{Extending String with methods to stem, count, and clean words}
|
12
|
+
s.description = %q{Extending the String Class with methods to stem, count, and clean words}
|
13
13
|
|
14
14
|
s.rubyforge_project = "stringfu"
|
15
15
|
|
metadata
CHANGED
@@ -1,17 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stringfu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.8
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Eric Wu
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-09-30 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
|
-
description: Extending String with methods to stem, count, and clean words
|
13
|
+
description: Extending the String Class with methods to stem, count, and clean words
|
15
14
|
email:
|
16
15
|
- me@eywu.com
|
17
16
|
executables: []
|
@@ -28,26 +27,25 @@ files:
|
|
28
27
|
- stringfu.gemspec
|
29
28
|
homepage: http://stringfu.com
|
30
29
|
licenses: []
|
30
|
+
metadata: {}
|
31
31
|
post_install_message:
|
32
32
|
rdoc_options: []
|
33
33
|
require_paths:
|
34
34
|
- lib
|
35
35
|
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
-
none: false
|
37
36
|
requirements:
|
38
37
|
- - ! '>='
|
39
38
|
- !ruby/object:Gem::Version
|
40
39
|
version: '0'
|
41
40
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
41
|
requirements:
|
44
42
|
- - ! '>='
|
45
43
|
- !ruby/object:Gem::Version
|
46
44
|
version: '0'
|
47
45
|
requirements: []
|
48
46
|
rubyforge_project: stringfu
|
49
|
-
rubygems_version: 1.
|
47
|
+
rubygems_version: 2.1.5
|
50
48
|
signing_key:
|
51
|
-
specification_version:
|
49
|
+
specification_version: 4
|
52
50
|
summary: Manipulating words like a grandmasta
|
53
51
|
test_files: []
|