text_analysis 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2197f3109b941d4e689529a18d361dec6a8645f0
4
- data.tar.gz: b5e8a30a0007b3b3f07cc48e29a21675c2d29611
3
+ metadata.gz: 8bf0a077bb90faf47c6a8dbb4a390099b6128b4f
4
+ data.tar.gz: 080a37894e3d3118ebca46617eb3a75216a68490
5
5
  SHA512:
6
- metadata.gz: a8530ae5b4449823464ca4055f99ae0dba0803f13a5c0b13f114dd3d088df4bb5c658c68ee26a2a755a0a912e5f2fcae388c67caa3d4c759cb89a787057478f6
7
- data.tar.gz: 4d60333852d771621c0610f7370e9c8f4d4e7ae955ca231a64f4630962ad74e8c2e9fdd5ee25c6e7427febfae76e8d60a40104d3d8367623aec4abf4640d850f
6
+ metadata.gz: eed1848f7929c86359066a959a777cfac269e97f87aada48a8d1956c47d7cd985e5b3359e4508843610f4cfacd5b2b3aeedf6ffee0a06af601c831caafdb9652
7
+ data.tar.gz: c56d71c7d7c6cd2c949761bd1afa275c3a790e8a57defd0fc8048414bf36b65e0fae8d80898a4ddd68dd3cd7933912ed110d9c1d106b1c3ccceedef103bd3e5f
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ ## 0.2.0 (2016-07-22)
2
+
3
+ New features
4
+ * Move out of hash response into proper model and attributes
5
+ * Added stop_words_found
6
+ * Added most_common_words
7
+ * Added most_common_non_stop_words
8
+
9
+ ## 0.1.0 (2016-07-19)
10
+
11
+ * Initial commit
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- text_analysis (0.1.0)
4
+ text_analysis (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -17,6 +17,9 @@ GEM
17
17
  rspec-expectations (3.4.0)
18
18
  diff-lcs (>= 1.2.0, < 2.0)
19
19
  rspec-support (~> 3.4.0)
20
+ rspec-its (1.2.0)
21
+ rspec-core (>= 3.0.0)
22
+ rspec-expectations (>= 3.0.0)
20
23
  rspec-mocks (3.4.0)
21
24
  diff-lcs (>= 1.2.0, < 2.0)
22
25
  rspec-support (~> 3.4.0)
@@ -29,6 +32,7 @@ DEPENDENCIES
29
32
  bundler (~> 1.10)
30
33
  rake (~> 10.0)
31
34
  rspec
35
+ rspec-its
32
36
  text_analysis!
33
37
 
34
38
  BUNDLED WITH
@@ -0,0 +1,182 @@
1
+ i
2
+ me
3
+ my
4
+ myself
5
+ we
6
+ our
7
+ ours
8
+ ourselves
9
+ you
10
+ your
11
+ yours
12
+ yourself
13
+ yourselves
14
+ he
15
+ him
16
+ his
17
+ himself
18
+ she
19
+ her
20
+ hers
21
+ herself
22
+ it
23
+ its
24
+ itself
25
+ they
26
+ them
27
+ their
28
+ theirs
29
+ themselves
30
+ what
31
+ which
32
+ who
33
+ whom
34
+ this
35
+ that
36
+ these
37
+ those
38
+ am
39
+ is
40
+ are
41
+ was
42
+ were
43
+ be
44
+ been
45
+ being
46
+ have
47
+ has
48
+ had
49
+ having
50
+ do
51
+ does
52
+ did
53
+ doing
54
+ would
55
+ should
56
+ could
57
+ ought
58
+ i'm
59
+ you're
60
+ he's
61
+ she's
62
+ it's
63
+ we're
64
+ they're
65
+ i've
66
+ you've
67
+ we've
68
+ they've
69
+ i'd
70
+ you'd
71
+ he'd
72
+ she'd
73
+ we'd
74
+ they'd
75
+ i'll
76
+ you'll
77
+ he'll
78
+ she'll
79
+ we'll
80
+ they'll
81
+ isn't
82
+ aren't
83
+ wasn't
84
+ weren't
85
+ hasn't
86
+ haven't
87
+ hadn't
88
+ doesn't
89
+ don't
90
+ didn't
91
+ won't
92
+ wouldn't
93
+ shan't
94
+ shouldn't
95
+ can't
96
+ cannot
97
+ couldn't
98
+ mustn't
99
+ let's
100
+ that's
101
+ who's
102
+ what's
103
+ here's
104
+ there's
105
+ when's
106
+ where's
107
+ why's
108
+ how's
109
+ a
110
+ an
111
+ the
112
+ and
113
+ but
114
+ if
115
+ or
116
+ because
117
+ as
118
+ until
119
+ while
120
+ of
121
+ at
122
+ by
123
+ for
124
+ with
125
+ about
126
+ against
127
+ between
128
+ into
129
+ through
130
+ during
131
+ before
132
+ after
133
+ above
134
+ below
135
+ to
136
+ from
137
+ up
138
+ down
139
+ in
140
+ out
141
+ on
142
+ off
143
+ over
144
+ under
145
+ again
146
+ further
147
+ then
148
+ once
149
+ here
150
+ there
151
+ when
152
+ where
153
+ why
154
+ how
155
+ all
156
+ any
157
+ both
158
+ each
159
+ few
160
+ more
161
+ most
162
+ other
163
+ some
164
+ such
165
+ no
166
+ nor
167
+ not
168
+ only
169
+ same
170
+ so
171
+ than
172
+ too
173
+ very
174
+ still
175
+ much
176
+ many
177
+ per
178
+ yet
179
+ sure
180
+ ok
181
+ now
182
+ might
@@ -1,3 +1,3 @@
1
1
  module TextAnalysis
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/text_analysis.rb CHANGED
@@ -2,12 +2,39 @@ require "text_analysis/version"
2
2
 
3
3
  module TextAnalysis
4
4
  def self.analyze_text(text_input)
5
- hash = {}
5
+ result = Result.new
6
6
 
7
- hash[:total_characters] = text_input.length
8
- hash[:total_characters_without_whitespaces] = text_input.gsub(/\s+/, "").length
9
- hash[:total_words] = text_input.split(/[\w-]+/).size
7
+ stop_words = File.readlines('data/stop_words/en.txt').map { |word| word.gsub("\n", '') }.inspect
8
+ input_words = text_input.split(/[\s]+/)
10
9
 
11
- hash
10
+ result.total_characters = text_input.length
11
+ result.total_characters_without_whitespaces = text_input.gsub(/\s+/, "").length
12
+ result.total_words = input_words.size
13
+ result.stop_words_found = input_words.select { |word| stop_words.include? word.downcase }.uniq
14
+ result.most_common_words =
15
+ input_words.
16
+ group_by { |word| word.downcase }.
17
+ map { |k,v| { :word => k, :occurences => v.size } }.
18
+ sort_by { |hash| hash[:occurences] }.
19
+ reverse
20
+
21
+ result.most_common_non_stop_words =
22
+ input_words.
23
+ reject { |word| stop_words.include? word.downcase }.
24
+ group_by { |word| word.downcase }.
25
+ map { |k,v| { :word => k, :occurences => v.size } }.
26
+ sort_by { |hash| hash[:occurences] }.
27
+ reverse
28
+
29
+ result
30
+ end
31
+
32
+ class Result
33
+ attr_accessor :total_characters,
34
+ :total_characters_without_whitespaces,
35
+ :total_words,
36
+ :stop_words_found,
37
+ :most_common_words,
38
+ :most_common_non_stop_words
12
39
  end
13
40
  end
@@ -22,4 +22,5 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "bundler", "~> 1.10"
23
23
  spec.add_development_dependency "rake", "~> 10.0"
24
24
  spec.add_development_dependency "rspec"
25
+ spec.add_development_dependency "rspec-its"
25
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_analysis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vasilis Kalligas
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-18 00:00:00.000000000 Z
11
+ date: 2016-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec-its
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  description: TextAnalysis is a gem that given a text input, can produce a list of
56
70
  text analysis information such as word and character count.
57
71
  email:
@@ -63,6 +77,7 @@ files:
63
77
  - ".gitignore"
64
78
  - ".rspec"
65
79
  - ".travis.yml"
80
+ - CHANGELOG.md
66
81
  - CODE_OF_CONDUCT.md
67
82
  - Gemfile
68
83
  - Gemfile.lock
@@ -72,6 +87,7 @@ files:
72
87
  - Rakefile
73
88
  - bin/console
74
89
  - bin/setup
90
+ - data/stop_words/en.txt
75
91
  - lib/text_analysis.rb
76
92
  - lib/text_analysis/version.rb
77
93
  - text_analysis.gemspec