flashtext 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d8ca3508b7355433ef41fc40eead394cdc2149d2
4
- data.tar.gz: 3715210544ee9170cf9c559ab10193b016e63615
3
+ metadata.gz: 869b2bf065283da58b746046bc45317ca9a47526
4
+ data.tar.gz: 0111edfd1206e610a0a8727181fb98abd3bfc5c9
5
5
  SHA512:
6
- metadata.gz: 2af1340ae2088f1327f689a0fc9c6036758f85571c3fbe6045b89593d2e5a45df8c5d2bcf705c51aa8fa69aad24504f71edd604454fd7cccfa485a31a605669f
7
- data.tar.gz: 0daa3bc37c6820204950c02f6766d2e11ab3850ebfaca8abd75582aa0453a0a2f6df59d2bdf19bd83e4108597f48caec8cb7c224cfc4a5295a106a57c21ac735
6
+ metadata.gz: e66b94f29f8912987b4411953b33cb470163a4c6c9409778941b6f4f4722c8305eceb8fbef8b07dca0fcbf4c1f4a12dd767305a4b32b2cba2909815b7a994542
7
+ data.tar.gz: 62029dadbaca46ebc761596a7a683764dd0d8ff9b89f475787d3288785d86be8fb432eb4b2bb91b8966636c0947b335595169a25f05e1bad503283c8154f0978
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- flashtext (0.1.0)
4
+ flashtext (0.1.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -4,8 +4,7 @@
4
4
 
5
5
  This module can be used to replace keywords in sentences or extract keywords from sentences. It is based on the [FlashText algorithm](https://arxiv.org/abs/1711.00046)
6
6
 
7
- More about Flashtext algorithm can be found here.
8
-
7
+ #### More about Flashtext algorithm.
9
8
  The original paper published on [FlashText algorithm](https://arxiv.org/abs/1711.00046>)
10
9
 
11
10
  The article published on [Medium freeCodeCamp](https://medium.freecodecamp.org/regex-was-taking-5-days-flashtext-does-it-in-15-minutes-55f04411025f)
@@ -22,6 +21,41 @@ API doc
22
21
  Documentation can be found at [FlashText Read the Docs](http://www.rubydoc.info/gems/flashtext/)
23
22
 
24
23
  ## Usage
24
+ #### Extract keywords
25
+ ```ruby
26
+ keyword_processor = Flashtext::KeywordProcessor.new
27
+ # keyword_processor.add_keyword(<unclean name>, <standardised name>)
28
+ keyword_processor.add_keyword('Big Apple', 'New York')
29
+ keyword_processor.add_keyword('Bay Area')
30
+ keywords_found = keyword_processor.extract_keywords('I love Big Apple and Bay Area.')
31
+ keywords_found
32
+ #=> ["New York", "Bay Area"]
33
+ ```
34
+
35
+ #### Replace keywords
36
+ ```ruby
37
+ keyword_processor.add_keyword('New Delhi', 'NCR region')
38
+ new_sentence = keyword_processor.replace_keywords('I love Big Apple and new delhi.')
39
+ new_sentence
40
+ #=> "I love New York and NCR region."
41
+ ```
42
+
43
+ #### Case Sensitive example
44
+ ```ruby
45
+ keyword_processor = Flashtext::KeywordProcessor.new(case_sensitive = true)
46
+ keyword_processor.add_keyword('Big Apple', 'New York')
47
+ keyword_processor.add_keyword('Bay Area')
48
+ keywords_found = keyword_processor.extract_keywords('I love big Apple and Bay Area.')
49
+ keywords_found
50
+ #=> ['Bay Area']
51
+ ```
52
+
53
+
54
+ Test
55
+ ----------
56
+ ```ruby
57
+ rspec spec
58
+ ```
25
59
 
26
60
  Contribute
27
61
  ----------
@@ -29,6 +63,13 @@ Contribute
29
63
  - Issue Tracker: https://github.com/imran3180/flashtext/issues
30
64
  - Source Code: https://github.com/imran3180/flashtext/issues
31
65
 
66
+ Implementation in other languages
67
+ ---------------------------------
68
+
69
+ - Python: https://github.com/vi3k6i5/flashtext (Core Project)
70
+ - JavaScript: https://github.com/drenther/flashtext.js
71
+ - Golang: https://github.com/sundy-li/flashtext
72
+
32
73
 
33
74
  ## License
34
75
 
@@ -130,5 +130,116 @@ module Flashtext
130
130
  end
131
131
  keywords_extracted
132
132
  end
133
+
134
+ def replace_keywords sentence
135
+ if sentence.nil? || sentence.empty?
136
+ return sentence
137
+ end
138
+ new_sentence = ""
139
+ original_sentence = sentence
140
+ sentence = sentence.downcase if not case_sensitive
141
+ current_word = ""
142
+ current_hash = keyword_trie_hash
143
+ current_white_space = ""
144
+ sequence_end_pos = 0
145
+ idx = 0
146
+ sentence_len = sentence.length
147
+
148
+ while idx < sentence_len
149
+ char = sentence[idx]
150
+ current_word += original_sentence[idx]
151
+
152
+ if not word_boundaries.member?(char)
153
+ current_white_space = char
154
+ if current_hash.has_key?(_keyword) or current_hash.has_key?(char)
155
+ # update longest sequence found
156
+ sequence_found = nil
157
+ longest_sequence_found = nil
158
+ is_longer_seq_found = false
159
+ if current_hash.has_key?(_keyword)
160
+ sequence_found = current_hash[_keyword]
161
+ longest_sequence_found = current_hash[_keyword]
162
+ sequence_end_pos = idx
163
+ end
164
+
165
+ # re look for longest_sequence from this position
166
+ if current_hash.has_key?(char)
167
+ current_hash_continued = current_hash[char]
168
+ current_word_continued = current_word
169
+ idy = idx + 1
170
+ while idy < sentence_len
171
+ inner_char = sentence[idy]
172
+ current_word_continued += original_sentence[idy]
173
+ if !word_boundaries.member?(inner_char) and current_hash_continued.has_key?(_keyword)
174
+ # Update longest sequence found
175
+ current_white_space = inner_char
176
+ longest_sequence_found = current_hash_continued[_keyword]
177
+ sequence_end_pos = idy
178
+ is_longer_seq_found = true
179
+ end
180
+ if current_hash_continued.has_key?(inner_char)
181
+ current_hash_continued = current_hash_continued[inner_char]
182
+ else
183
+ break
184
+ end
185
+ idy += 1
186
+ end
187
+ if idy == sentence_len # end of sentence reached.
188
+ if current_hash_continued.member?(_keyword)
189
+ # update longest sequence found
190
+ current_white_space = ""
191
+ longest_sequence_found = current_hash_continued[_keyword]
192
+ sequence_end_pos = idy
193
+ is_longer_seq_found = true
194
+ end
195
+ end
196
+ if is_longer_seq_found
197
+ idx = sequence_end_pos
198
+ current_word = current_word_continued
199
+ end
200
+ end
201
+ current_hash = keyword_trie_hash
202
+ if longest_sequence_found
203
+ new_sentence += (longest_sequence_found + current_white_space)
204
+ current_word = ''
205
+ current_white_space = ''
206
+ else
207
+ new_sentence += current_word
208
+ current_word = ''
209
+ current_white_space = ''
210
+ end
211
+ else
212
+ # we reset current_hash
213
+ current_hash = keyword_trie_hash
214
+ new_sentence += current_word
215
+ current_word = ''
216
+ current_white_space = ''
217
+ end
218
+ elsif current_hash.has_key?(char)
219
+ # we can continue from this char
220
+ current_hash = current_hash[char]
221
+ else
222
+ # reset current_hash
223
+ current_hash = keyword_trie_hash
224
+ idy = idx + 1
225
+ while idy < sentence_len
226
+ char = sentence[idy]
227
+ current_word += original_sentence[idy]
228
+ break if not word_boundaries.member?(char)
229
+ idy += 1
230
+ end
231
+ idx = idy
232
+ new_sentence += current_word
233
+ current_word = ""
234
+ current_white_space = ""
235
+ end
236
+ if idx + 1 >= sentence_len && current_hash.has_key?(_keyword)
237
+ sequence_found = current_hash[_keyword]
238
+ new_sentence += sequence_found
239
+ end
240
+ idx = idx + 1 # loop increment
241
+ end
242
+ return new_sentence
243
+ end
133
244
  end
134
245
  end
@@ -1,3 +1,3 @@
1
1
  module Flashtext
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flashtext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Imran
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-18 00:00:00.000000000 Z
11
+ date: 2018-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler