flashtext 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d8ca3508b7355433ef41fc40eead394cdc2149d2
4
- data.tar.gz: 3715210544ee9170cf9c559ab10193b016e63615
3
+ metadata.gz: 869b2bf065283da58b746046bc45317ca9a47526
4
+ data.tar.gz: 0111edfd1206e610a0a8727181fb98abd3bfc5c9
5
5
  SHA512:
6
- metadata.gz: 2af1340ae2088f1327f689a0fc9c6036758f85571c3fbe6045b89593d2e5a45df8c5d2bcf705c51aa8fa69aad24504f71edd604454fd7cccfa485a31a605669f
7
- data.tar.gz: 0daa3bc37c6820204950c02f6766d2e11ab3850ebfaca8abd75582aa0453a0a2f6df59d2bdf19bd83e4108597f48caec8cb7c224cfc4a5295a106a57c21ac735
6
+ metadata.gz: e66b94f29f8912987b4411953b33cb470163a4c6c9409778941b6f4f4722c8305eceb8fbef8b07dca0fcbf4c1f4a12dd767305a4b32b2cba2909815b7a994542
7
+ data.tar.gz: 62029dadbaca46ebc761596a7a683764dd0d8ff9b89f475787d3288785d86be8fb432eb4b2bb91b8966636c0947b335595169a25f05e1bad503283c8154f0978
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- flashtext (0.1.0)
4
+ flashtext (0.1.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -4,8 +4,7 @@
4
4
 
5
5
  This module can be used to replace keywords in sentences or extract keywords from sentences. It is based on the [FlashText algorithm](https://arxiv.org/abs/1711.00046)
6
6
 
7
- More about Flashtext algorithm can be found here.
8
-
7
+ #### More about Flashtext algorithm.
9
8
  The original paper published on [FlashText algorithm](https://arxiv.org/abs/1711.00046>)
10
9
 
11
10
  The article published on [Medium freeCodeCamp](https://medium.freecodecamp.org/regex-was-taking-5-days-flashtext-does-it-in-15-minutes-55f04411025f)
@@ -22,6 +21,41 @@ API doc
22
21
  Documentation can be found at [FlashText Read the Docs](http://www.rubydoc.info/gems/flashtext/)
23
22
 
24
23
  ## Usage
24
+ #### Extract keywords
25
+ ```ruby
26
+ keyword_processor = Flashtext::KeywordProcessor.new
27
+ # keyword_processor.add_keyword(<unclean name>, <standardised name>)
28
+ keyword_processor.add_keyword('Big Apple', 'New York')
29
+ keyword_processor.add_keyword('Bay Area')
30
+ keywords_found = keyword_processor.extract_keywords('I love Big Apple and Bay Area.')
31
+ keywords_found
32
+ #=> ["New York", "Bay Area"]
33
+ ```
34
+
35
+ #### Replace keywords
36
+ ```ruby
37
+ keyword_processor.add_keyword('New Delhi', 'NCR region')
38
+ new_sentence = keyword_processor.replace_keywords('I love Big Apple and new delhi.')
39
+ new_sentence
40
+ #=> "I love New York and NCR region."
41
+ ```
42
+
43
+ #### Case Sensitive example
44
+ ```ruby
45
+ keyword_processor = Flashtext::KeywordProcessor.new(case_sensitive = true)
46
+ keyword_processor.add_keyword('Big Apple', 'New York')
47
+ keyword_processor.add_keyword('Bay Area')
48
+ keywords_found = keyword_processor.extract_keywords('I love big Apple and Bay Area.')
49
+ keywords_found
50
+ #=> ['Bay Area']
51
+ ```
52
+
53
+
54
+ Test
55
+ ----------
56
+ ```ruby
57
+ rspec spec
58
+ ```
25
59
 
26
60
  Contribute
27
61
  ----------
@@ -29,6 +63,13 @@ Contribute
29
63
  - Issue Tracker: https://github.com/imran3180/flashtext/issues
30
64
  - Source Code: https://github.com/imran3180/flashtext/issues
31
65
 
66
+ Implementation in other languages
67
+ ---------------------------------
68
+
69
+ - Python: https://github.com/vi3k6i5/flashtext (Core Project)
70
+ - JavaScript: https://github.com/drenther/flashtext.js
71
+ - Golang: https://github.com/sundy-li/flashtext
72
+
32
73
 
33
74
  ## License
34
75
 
@@ -130,5 +130,116 @@ module Flashtext
130
130
  end
131
131
  keywords_extracted
132
132
  end
133
+
134
+ def replace_keywords sentence
135
+ if sentence.nil? || sentence.empty?
136
+ return sentence
137
+ end
138
+ new_sentence = ""
139
+ original_sentence = sentence
140
+ sentence = sentence.downcase if not case_sensitive
141
+ current_word = ""
142
+ current_hash = keyword_trie_hash
143
+ current_white_space = ""
144
+ sequence_end_pos = 0
145
+ idx = 0
146
+ sentence_len = sentence.length
147
+
148
+ while idx < sentence_len
149
+ char = sentence[idx]
150
+ current_word += original_sentence[idx]
151
+
152
+ if not word_boundaries.member?(char)
153
+ current_white_space = char
154
+ if current_hash.has_key?(_keyword) or current_hash.has_key?(char)
155
+ # update longest sequence found
156
+ sequence_found = nil
157
+ longest_sequence_found = nil
158
+ is_longer_seq_found = false
159
+ if current_hash.has_key?(_keyword)
160
+ sequence_found = current_hash[_keyword]
161
+ longest_sequence_found = current_hash[_keyword]
162
+ sequence_end_pos = idx
163
+ end
164
+
165
+ # re look for longest_sequence from this position
166
+ if current_hash.has_key?(char)
167
+ current_hash_continued = current_hash[char]
168
+ current_word_continued = current_word
169
+ idy = idx + 1
170
+ while idy < sentence_len
171
+ inner_char = sentence[idy]
172
+ current_word_continued += original_sentence[idy]
173
+ if !word_boundaries.member?(inner_char) and current_hash_continued.has_key?(_keyword)
174
+ # Update longest sequence found
175
+ current_white_space = inner_char
176
+ longest_sequence_found = current_hash_continued[_keyword]
177
+ sequence_end_pos = idy
178
+ is_longer_seq_found = true
179
+ end
180
+ if current_hash_continued.has_key?(inner_char)
181
+ current_hash_continued = current_hash_continued[inner_char]
182
+ else
183
+ break
184
+ end
185
+ idy += 1
186
+ end
187
+ if idy == sentence_len # end of sentence reached.
188
+ if current_hash_continued.member?(_keyword)
189
+ # update longest sequence found
190
+ current_white_space = ""
191
+ longest_sequence_found = current_hash_continued[_keyword]
192
+ sequence_end_pos = idy
193
+ is_longer_seq_found = true
194
+ end
195
+ end
196
+ if is_longer_seq_found
197
+ idx = sequence_end_pos
198
+ current_word = current_word_continued
199
+ end
200
+ end
201
+ current_hash = keyword_trie_hash
202
+ if longest_sequence_found
203
+ new_sentence += (longest_sequence_found + current_white_space)
204
+ current_word = ''
205
+ current_white_space = ''
206
+ else
207
+ new_sentence += current_word
208
+ current_word = ''
209
+ current_white_space = ''
210
+ end
211
+ else
212
+ # we reset current_hash
213
+ current_hash = keyword_trie_hash
214
+ new_sentence += current_word
215
+ current_word = ''
216
+ current_white_space = ''
217
+ end
218
+ elsif current_hash.has_key?(char)
219
+ # we can continue from this char
220
+ current_hash = current_hash[char]
221
+ else
222
+ # reset current_hash
223
+ current_hash = keyword_trie_hash
224
+ idy = idx + 1
225
+ while idy < sentence_len
226
+ char = sentence[idy]
227
+ current_word += original_sentence[idy]
228
+ break if not word_boundaries.member?(char)
229
+ idy += 1
230
+ end
231
+ idx = idy
232
+ new_sentence += current_word
233
+ current_word = ""
234
+ current_white_space = ""
235
+ end
236
+ if idx + 1 >= sentence_len && current_hash.has_key?(_keyword)
237
+ sequence_found = current_hash[_keyword]
238
+ new_sentence += sequence_found
239
+ end
240
+ idx = idx + 1 # loop increment
241
+ end
242
+ return new_sentence
243
+ end
133
244
  end
134
245
  end
@@ -1,3 +1,3 @@
1
1
  module Flashtext
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flashtext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Imran
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-18 00:00:00.000000000 Z
11
+ date: 2018-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler