text_analysis 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2197f3109b941d4e689529a18d361dec6a8645f0
4
- data.tar.gz: b5e8a30a0007b3b3f07cc48e29a21675c2d29611
3
+ metadata.gz: 8bf0a077bb90faf47c6a8dbb4a390099b6128b4f
4
+ data.tar.gz: 080a37894e3d3118ebca46617eb3a75216a68490
5
5
  SHA512:
6
- metadata.gz: a8530ae5b4449823464ca4055f99ae0dba0803f13a5c0b13f114dd3d088df4bb5c658c68ee26a2a755a0a912e5f2fcae388c67caa3d4c759cb89a787057478f6
7
- data.tar.gz: 4d60333852d771621c0610f7370e9c8f4d4e7ae955ca231a64f4630962ad74e8c2e9fdd5ee25c6e7427febfae76e8d60a40104d3d8367623aec4abf4640d850f
6
+ metadata.gz: eed1848f7929c86359066a959a777cfac269e97f87aada48a8d1956c47d7cd985e5b3359e4508843610f4cfacd5b2b3aeedf6ffee0a06af601c831caafdb9652
7
+ data.tar.gz: c56d71c7d7c6cd2c949761bd1afa275c3a790e8a57defd0fc8048414bf36b65e0fae8d80898a4ddd68dd3cd7933912ed110d9c1d106b1c3ccceedef103bd3e5f
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ ## 0.2.0 (2016-07-22)
2
+
3
+ New features
4
+ * Move out of hash response into proper model and attributes
5
+ * Added stop_words_found
6
+ * Added most_common_words
7
+ * Added most_common_non_stop_words
8
+
9
+ ## 0.1.0 (2016-07-19)
10
+
11
+ * Initial commit
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- text_analysis (0.1.0)
4
+ text_analysis (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -17,6 +17,9 @@ GEM
17
17
  rspec-expectations (3.4.0)
18
18
  diff-lcs (>= 1.2.0, < 2.0)
19
19
  rspec-support (~> 3.4.0)
20
+ rspec-its (1.2.0)
21
+ rspec-core (>= 3.0.0)
22
+ rspec-expectations (>= 3.0.0)
20
23
  rspec-mocks (3.4.0)
21
24
  diff-lcs (>= 1.2.0, < 2.0)
22
25
  rspec-support (~> 3.4.0)
@@ -29,6 +32,7 @@ DEPENDENCIES
29
32
  bundler (~> 1.10)
30
33
  rake (~> 10.0)
31
34
  rspec
35
+ rspec-its
32
36
  text_analysis!
33
37
 
34
38
  BUNDLED WITH
@@ -0,0 +1,182 @@
1
+ i
2
+ me
3
+ my
4
+ myself
5
+ we
6
+ our
7
+ ours
8
+ ourselves
9
+ you
10
+ your
11
+ yours
12
+ yourself
13
+ yourselves
14
+ he
15
+ him
16
+ his
17
+ himself
18
+ she
19
+ her
20
+ hers
21
+ herself
22
+ it
23
+ its
24
+ itself
25
+ they
26
+ them
27
+ their
28
+ theirs
29
+ themselves
30
+ what
31
+ which
32
+ who
33
+ whom
34
+ this
35
+ that
36
+ these
37
+ those
38
+ am
39
+ is
40
+ are
41
+ was
42
+ were
43
+ be
44
+ been
45
+ being
46
+ have
47
+ has
48
+ had
49
+ having
50
+ do
51
+ does
52
+ did
53
+ doing
54
+ would
55
+ should
56
+ could
57
+ ought
58
+ i'm
59
+ you're
60
+ he's
61
+ she's
62
+ it's
63
+ we're
64
+ they're
65
+ i've
66
+ you've
67
+ we've
68
+ they've
69
+ i'd
70
+ you'd
71
+ he'd
72
+ she'd
73
+ we'd
74
+ they'd
75
+ i'll
76
+ you'll
77
+ he'll
78
+ she'll
79
+ we'll
80
+ they'll
81
+ isn't
82
+ aren't
83
+ wasn't
84
+ weren't
85
+ hasn't
86
+ haven't
87
+ hadn't
88
+ doesn't
89
+ don't
90
+ didn't
91
+ won't
92
+ wouldn't
93
+ shan't
94
+ shouldn't
95
+ can't
96
+ cannot
97
+ couldn't
98
+ mustn't
99
+ let's
100
+ that's
101
+ who's
102
+ what's
103
+ here's
104
+ there's
105
+ when's
106
+ where's
107
+ why's
108
+ how's
109
+ a
110
+ an
111
+ the
112
+ and
113
+ but
114
+ if
115
+ or
116
+ because
117
+ as
118
+ until
119
+ while
120
+ of
121
+ at
122
+ by
123
+ for
124
+ with
125
+ about
126
+ against
127
+ between
128
+ into
129
+ through
130
+ during
131
+ before
132
+ after
133
+ above
134
+ below
135
+ to
136
+ from
137
+ up
138
+ down
139
+ in
140
+ out
141
+ on
142
+ off
143
+ over
144
+ under
145
+ again
146
+ further
147
+ then
148
+ once
149
+ here
150
+ there
151
+ when
152
+ where
153
+ why
154
+ how
155
+ all
156
+ any
157
+ both
158
+ each
159
+ few
160
+ more
161
+ most
162
+ other
163
+ some
164
+ such
165
+ no
166
+ nor
167
+ not
168
+ only
169
+ same
170
+ so
171
+ than
172
+ too
173
+ very
174
+ still
175
+ much
176
+ many
177
+ per
178
+ yet
179
+ sure
180
+ ok
181
+ now
182
+ might
@@ -1,3 +1,3 @@
1
1
  module TextAnalysis
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/text_analysis.rb CHANGED
@@ -2,12 +2,39 @@ require "text_analysis/version"
2
2
 
3
3
  module TextAnalysis
4
4
  def self.analyze_text(text_input)
5
- hash = {}
5
+ result = Result.new
6
6
 
7
- hash[:total_characters] = text_input.length
8
- hash[:total_characters_without_whitespaces] = text_input.gsub(/\s+/, "").length
9
- hash[:total_words] = text_input.split(/[\w-]+/).size
7
+ stop_words = File.readlines('data/stop_words/en.txt').map { |word| word.gsub("\n", '') }.inspect
8
+ input_words = text_input.split(/[\s]+/)
10
9
 
11
- hash
10
+ result.total_characters = text_input.length
11
+ result.total_characters_without_whitespaces = text_input.gsub(/\s+/, "").length
12
+ result.total_words = input_words.size
13
+ result.stop_words_found = input_words.select { |word| stop_words.include? word.downcase }.uniq
14
+ result.most_common_words =
15
+ input_words.
16
+ group_by { |word| word.downcase }.
17
+ map { |k,v| { :word => k, :occurences => v.size } }.
18
+ sort_by { |hash| hash[:occurences] }.
19
+ reverse
20
+
21
+ result.most_common_non_stop_words =
22
+ input_words.
23
+ reject { |word| stop_words.include? word.downcase }.
24
+ group_by { |word| word.downcase }.
25
+ map { |k,v| { :word => k, :occurences => v.size } }.
26
+ sort_by { |hash| hash[:occurences] }.
27
+ reverse
28
+
29
+ result
30
+ end
31
+
32
+ class Result
33
+ attr_accessor :total_characters,
34
+ :total_characters_without_whitespaces,
35
+ :total_words,
36
+ :stop_words_found,
37
+ :most_common_words,
38
+ :most_common_non_stop_words
12
39
  end
13
40
  end
@@ -22,4 +22,5 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "bundler", "~> 1.10"
23
23
  spec.add_development_dependency "rake", "~> 10.0"
24
24
  spec.add_development_dependency "rspec"
25
+ spec.add_development_dependency "rspec-its"
25
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_analysis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vasilis Kalligas
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-18 00:00:00.000000000 Z
11
+ date: 2016-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec-its
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  description: TextAnalysis is a gem that given a text input, can produce a list of
56
70
  text analysis information such as word and character count.
57
71
  email:
@@ -63,6 +77,7 @@ files:
63
77
  - ".gitignore"
64
78
  - ".rspec"
65
79
  - ".travis.yml"
80
+ - CHANGELOG.md
66
81
  - CODE_OF_CONDUCT.md
67
82
  - Gemfile
68
83
  - Gemfile.lock
@@ -72,6 +87,7 @@ files:
72
87
  - Rakefile
73
88
  - bin/console
74
89
  - bin/setup
90
+ - data/stop_words/en.txt
75
91
  - lib/text_analysis.rb
76
92
  - lib/text_analysis/version.rb
77
93
  - text_analysis.gemspec