text_analysis 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Gemfile.lock +5 -1
- data/data/stop_words/en.txt +182 -0
- data/lib/text_analysis/version.rb +1 -1
- data/lib/text_analysis.rb +32 -5
- data/text_analysis.gemspec +1 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bf0a077bb90faf47c6a8dbb4a390099b6128b4f
|
4
|
+
data.tar.gz: 080a37894e3d3118ebca46617eb3a75216a68490
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eed1848f7929c86359066a959a777cfac269e97f87aada48a8d1956c47d7cd985e5b3359e4508843610f4cfacd5b2b3aeedf6ffee0a06af601c831caafdb9652
|
7
|
+
data.tar.gz: c56d71c7d7c6cd2c949761bd1afa275c3a790e8a57defd0fc8048414bf36b65e0fae8d80898a4ddd68dd3cd7933912ed110d9c1d106b1c3ccceedef103bd3e5f
|
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
text_analysis (0.
|
4
|
+
text_analysis (0.2.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -17,6 +17,9 @@ GEM
|
|
17
17
|
rspec-expectations (3.4.0)
|
18
18
|
diff-lcs (>= 1.2.0, < 2.0)
|
19
19
|
rspec-support (~> 3.4.0)
|
20
|
+
rspec-its (1.2.0)
|
21
|
+
rspec-core (>= 3.0.0)
|
22
|
+
rspec-expectations (>= 3.0.0)
|
20
23
|
rspec-mocks (3.4.0)
|
21
24
|
diff-lcs (>= 1.2.0, < 2.0)
|
22
25
|
rspec-support (~> 3.4.0)
|
@@ -29,6 +32,7 @@ DEPENDENCIES
|
|
29
32
|
bundler (~> 1.10)
|
30
33
|
rake (~> 10.0)
|
31
34
|
rspec
|
35
|
+
rspec-its
|
32
36
|
text_analysis!
|
33
37
|
|
34
38
|
BUNDLED WITH
|
@@ -0,0 +1,182 @@
|
|
1
|
+
i
|
2
|
+
me
|
3
|
+
my
|
4
|
+
myself
|
5
|
+
we
|
6
|
+
our
|
7
|
+
ours
|
8
|
+
ourselves
|
9
|
+
you
|
10
|
+
your
|
11
|
+
yours
|
12
|
+
yourself
|
13
|
+
yourselves
|
14
|
+
he
|
15
|
+
him
|
16
|
+
his
|
17
|
+
himself
|
18
|
+
she
|
19
|
+
her
|
20
|
+
hers
|
21
|
+
herself
|
22
|
+
it
|
23
|
+
its
|
24
|
+
itself
|
25
|
+
they
|
26
|
+
them
|
27
|
+
their
|
28
|
+
theirs
|
29
|
+
themselves
|
30
|
+
what
|
31
|
+
which
|
32
|
+
who
|
33
|
+
whom
|
34
|
+
this
|
35
|
+
that
|
36
|
+
these
|
37
|
+
those
|
38
|
+
am
|
39
|
+
is
|
40
|
+
are
|
41
|
+
was
|
42
|
+
were
|
43
|
+
be
|
44
|
+
been
|
45
|
+
being
|
46
|
+
have
|
47
|
+
has
|
48
|
+
had
|
49
|
+
having
|
50
|
+
do
|
51
|
+
does
|
52
|
+
did
|
53
|
+
doing
|
54
|
+
would
|
55
|
+
should
|
56
|
+
could
|
57
|
+
ought
|
58
|
+
i'm
|
59
|
+
you're
|
60
|
+
he's
|
61
|
+
she's
|
62
|
+
it's
|
63
|
+
we're
|
64
|
+
they're
|
65
|
+
i've
|
66
|
+
you've
|
67
|
+
we've
|
68
|
+
they've
|
69
|
+
i'd
|
70
|
+
you'd
|
71
|
+
he'd
|
72
|
+
she'd
|
73
|
+
we'd
|
74
|
+
they'd
|
75
|
+
i'll
|
76
|
+
you'll
|
77
|
+
he'll
|
78
|
+
she'll
|
79
|
+
we'll
|
80
|
+
they'll
|
81
|
+
isn't
|
82
|
+
aren't
|
83
|
+
wasn't
|
84
|
+
weren't
|
85
|
+
hasn't
|
86
|
+
haven't
|
87
|
+
hadn't
|
88
|
+
doesn't
|
89
|
+
don't
|
90
|
+
didn't
|
91
|
+
won't
|
92
|
+
wouldn't
|
93
|
+
shan't
|
94
|
+
shouldn't
|
95
|
+
can't
|
96
|
+
cannot
|
97
|
+
couldn't
|
98
|
+
mustn't
|
99
|
+
let's
|
100
|
+
that's
|
101
|
+
who's
|
102
|
+
what's
|
103
|
+
here's
|
104
|
+
there's
|
105
|
+
when's
|
106
|
+
where's
|
107
|
+
why's
|
108
|
+
how's
|
109
|
+
a
|
110
|
+
an
|
111
|
+
the
|
112
|
+
and
|
113
|
+
but
|
114
|
+
if
|
115
|
+
or
|
116
|
+
because
|
117
|
+
as
|
118
|
+
until
|
119
|
+
while
|
120
|
+
of
|
121
|
+
at
|
122
|
+
by
|
123
|
+
for
|
124
|
+
with
|
125
|
+
about
|
126
|
+
against
|
127
|
+
between
|
128
|
+
into
|
129
|
+
through
|
130
|
+
during
|
131
|
+
before
|
132
|
+
after
|
133
|
+
above
|
134
|
+
below
|
135
|
+
to
|
136
|
+
from
|
137
|
+
up
|
138
|
+
down
|
139
|
+
in
|
140
|
+
out
|
141
|
+
on
|
142
|
+
off
|
143
|
+
over
|
144
|
+
under
|
145
|
+
again
|
146
|
+
further
|
147
|
+
then
|
148
|
+
once
|
149
|
+
here
|
150
|
+
there
|
151
|
+
when
|
152
|
+
where
|
153
|
+
why
|
154
|
+
how
|
155
|
+
all
|
156
|
+
any
|
157
|
+
both
|
158
|
+
each
|
159
|
+
few
|
160
|
+
more
|
161
|
+
most
|
162
|
+
other
|
163
|
+
some
|
164
|
+
such
|
165
|
+
no
|
166
|
+
nor
|
167
|
+
not
|
168
|
+
only
|
169
|
+
same
|
170
|
+
so
|
171
|
+
than
|
172
|
+
too
|
173
|
+
very
|
174
|
+
still
|
175
|
+
much
|
176
|
+
many
|
177
|
+
per
|
178
|
+
yet
|
179
|
+
sure
|
180
|
+
ok
|
181
|
+
now
|
182
|
+
might
|
data/lib/text_analysis.rb
CHANGED
@@ -2,12 +2,39 @@ require "text_analysis/version"
|
|
2
2
|
|
3
3
|
module TextAnalysis
|
4
4
|
def self.analyze_text(text_input)
|
5
|
-
|
5
|
+
result = Result.new
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
hash[:total_words] = text_input.split(/[\w-]+/).size
|
7
|
+
stop_words = File.readlines('data/stop_words/en.txt').map { |word| word.gsub("\n", '') }.inspect
|
8
|
+
input_words = text_input.split(/[\s]+/)
|
10
9
|
|
11
|
-
|
10
|
+
result.total_characters = text_input.length
|
11
|
+
result.total_characters_without_whitespaces = text_input.gsub(/\s+/, "").length
|
12
|
+
result.total_words = input_words.size
|
13
|
+
result.stop_words_found = input_words.select { |word| stop_words.include? word.downcase }.uniq
|
14
|
+
result.most_common_words =
|
15
|
+
input_words.
|
16
|
+
group_by { |word| word.downcase }.
|
17
|
+
map { |k,v| { :word => k, :occurences => v.size } }.
|
18
|
+
sort_by { |hash| hash[:occurences] }.
|
19
|
+
reverse
|
20
|
+
|
21
|
+
result.most_common_non_stop_words =
|
22
|
+
input_words.
|
23
|
+
reject { |word| stop_words.include? word.downcase }.
|
24
|
+
group_by { |word| word.downcase }.
|
25
|
+
map { |k,v| { :word => k, :occurences => v.size } }.
|
26
|
+
sort_by { |hash| hash[:occurences] }.
|
27
|
+
reverse
|
28
|
+
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
class Result
|
33
|
+
attr_accessor :total_characters,
|
34
|
+
:total_characters_without_whitespaces,
|
35
|
+
:total_words,
|
36
|
+
:stop_words_found,
|
37
|
+
:most_common_words,
|
38
|
+
:most_common_non_stop_words
|
12
39
|
end
|
13
40
|
end
|
data/text_analysis.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_analysis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vasilis Kalligas
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec-its
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description: TextAnalysis is a gem that given a text input, can produce a list of
|
56
70
|
text analysis information such as word and character count.
|
57
71
|
email:
|
@@ -63,6 +77,7 @@ files:
|
|
63
77
|
- ".gitignore"
|
64
78
|
- ".rspec"
|
65
79
|
- ".travis.yml"
|
80
|
+
- CHANGELOG.md
|
66
81
|
- CODE_OF_CONDUCT.md
|
67
82
|
- Gemfile
|
68
83
|
- Gemfile.lock
|
@@ -72,6 +87,7 @@ files:
|
|
72
87
|
- Rakefile
|
73
88
|
- bin/console
|
74
89
|
- bin/setup
|
90
|
+
- data/stop_words/en.txt
|
75
91
|
- lib/text_analysis.rb
|
76
92
|
- lib/text_analysis/version.rb
|
77
93
|
- text_analysis.gemspec
|