woccur 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/lib/common.yml +114 -0
- data/lib/woccur.rb +45 -0
- data/lib/woccur/core_ext.rb +5 -0
- data/lib/woccur/version.rb +3 -0
- data/spec/woccur_spec.rb +13 -0
- data/woccur.gemspec +23 -0
- metadata +75 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
data/lib/common.yml
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
- a
|
2
|
+
- about
|
3
|
+
- after
|
4
|
+
- again
|
5
|
+
- against
|
6
|
+
- all
|
7
|
+
- an
|
8
|
+
- another
|
9
|
+
- any
|
10
|
+
- and
|
11
|
+
- are
|
12
|
+
- as
|
13
|
+
- at
|
14
|
+
- be
|
15
|
+
- being
|
16
|
+
- been
|
17
|
+
- before
|
18
|
+
- but
|
19
|
+
- by
|
20
|
+
- can
|
21
|
+
- could
|
22
|
+
- did
|
23
|
+
- do
|
24
|
+
- don't
|
25
|
+
- down
|
26
|
+
- each
|
27
|
+
- few
|
28
|
+
- from
|
29
|
+
- for
|
30
|
+
- get
|
31
|
+
- got
|
32
|
+
- great
|
33
|
+
- had
|
34
|
+
- has
|
35
|
+
- have
|
36
|
+
- he
|
37
|
+
- her
|
38
|
+
- here
|
39
|
+
- his
|
40
|
+
- him
|
41
|
+
- himself
|
42
|
+
- hers
|
43
|
+
- how
|
44
|
+
- i
|
45
|
+
- if
|
46
|
+
- i'm
|
47
|
+
- in
|
48
|
+
- into
|
49
|
+
- is
|
50
|
+
- it
|
51
|
+
- it's
|
52
|
+
- just
|
53
|
+
- like
|
54
|
+
- made
|
55
|
+
- me
|
56
|
+
- more
|
57
|
+
- most
|
58
|
+
- my
|
59
|
+
- no
|
60
|
+
- not
|
61
|
+
- of
|
62
|
+
- off
|
63
|
+
- on
|
64
|
+
- once
|
65
|
+
- one
|
66
|
+
- only
|
67
|
+
- or
|
68
|
+
- other
|
69
|
+
- our
|
70
|
+
- out
|
71
|
+
- over
|
72
|
+
- own
|
73
|
+
- said
|
74
|
+
- she
|
75
|
+
- should
|
76
|
+
- so
|
77
|
+
- some
|
78
|
+
- such
|
79
|
+
- than
|
80
|
+
- that
|
81
|
+
- the
|
82
|
+
- their
|
83
|
+
- them
|
84
|
+
- then
|
85
|
+
- there
|
86
|
+
- these
|
87
|
+
- they
|
88
|
+
- this
|
89
|
+
- those
|
90
|
+
- through
|
91
|
+
- to
|
92
|
+
- too
|
93
|
+
- under
|
94
|
+
- until
|
95
|
+
- up
|
96
|
+
- very
|
97
|
+
- was
|
98
|
+
- wasn't
|
99
|
+
- we
|
100
|
+
- were
|
101
|
+
- we're
|
102
|
+
- what
|
103
|
+
- when
|
104
|
+
- where
|
105
|
+
- which
|
106
|
+
- while
|
107
|
+
- who
|
108
|
+
- why
|
109
|
+
- will
|
110
|
+
- with
|
111
|
+
- would
|
112
|
+
- wouldn't
|
113
|
+
- you
|
114
|
+
- your
|
data/lib/woccur.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'woccur/version'
|
2
|
+
require 'woccur/core_ext'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module Woccur
|
6
|
+
def self.common_words(text = "")
|
7
|
+
words = text.gsub(/[^a-zA-Z\'\s]/, " ").split(" ")
|
8
|
+
freq = Hash.new(0)
|
9
|
+
|
10
|
+
common_words = %w(
|
11
|
+
a about after again against all an another any and are as at
|
12
|
+
be being been before but by
|
13
|
+
can could
|
14
|
+
did do don't down
|
15
|
+
each
|
16
|
+
few from for
|
17
|
+
get got great
|
18
|
+
had has have he her here his him himself hers how
|
19
|
+
i if i'm in into is it it's
|
20
|
+
just
|
21
|
+
like
|
22
|
+
made me more most my
|
23
|
+
no not
|
24
|
+
of off on once one only or other our out over own
|
25
|
+
said she should so some such
|
26
|
+
than that the their them then there these they this those through to too
|
27
|
+
under until up
|
28
|
+
very
|
29
|
+
was wasn't we were we're what when where which while who why will with would wouldn't
|
30
|
+
you your)
|
31
|
+
|
32
|
+
words.each do |word|
|
33
|
+
word = word.downcase
|
34
|
+
if word == ""
|
35
|
+
next
|
36
|
+
end
|
37
|
+
|
38
|
+
if !common_words.include? word
|
39
|
+
freq[word] = freq[word] +1
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
freq.sort_by {|k, v| -v}
|
44
|
+
end
|
45
|
+
end
|
data/spec/woccur_spec.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
|
2
|
+
$LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
|
3
|
+
require "woccur"
|
4
|
+
|
5
|
+
describe Woccur do
|
6
|
+
it "should ignore common words" do
|
7
|
+
"A is a common word".common_words.select {|x, y| x == "a" }.empty?.should be_true
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should count words" do
|
11
|
+
"Second has been said for a second time".common_words.select {|x, y| x == "second" }.first.last.should == 2
|
12
|
+
end
|
13
|
+
end
|
data/woccur.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "woccur/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "woccur"
|
7
|
+
s.version = Woccur::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Sam Knight"]
|
10
|
+
s.email = ["sam@samknight.co.uk"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{Find the most commons words in a string}
|
13
|
+
s.description = %q{Find the most commons words in a string}
|
14
|
+
|
15
|
+
s.rubyforge_project = "woccur"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_development_dependency "rspec", "~>2.6"
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: woccur
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Sam Knight
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-11-23 00:00:00 +00:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rspec
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "2.6"
|
25
|
+
type: :development
|
26
|
+
version_requirements: *id001
|
27
|
+
description: Find the most commons words in a string
|
28
|
+
email:
|
29
|
+
- sam@samknight.co.uk
|
30
|
+
executables: []
|
31
|
+
|
32
|
+
extensions: []
|
33
|
+
|
34
|
+
extra_rdoc_files: []
|
35
|
+
|
36
|
+
files:
|
37
|
+
- .gitignore
|
38
|
+
- Gemfile
|
39
|
+
- Rakefile
|
40
|
+
- lib/common.yml
|
41
|
+
- lib/woccur.rb
|
42
|
+
- lib/woccur/core_ext.rb
|
43
|
+
- lib/woccur/version.rb
|
44
|
+
- spec/woccur_spec.rb
|
45
|
+
- woccur.gemspec
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: ""
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options: []
|
52
|
+
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
rubyforge_project: woccur
|
70
|
+
rubygems_version: 1.5.0
|
71
|
+
signing_key:
|
72
|
+
specification_version: 3
|
73
|
+
summary: Find the most commons words in a string
|
74
|
+
test_files:
|
75
|
+
- spec/woccur_spec.rb
|