bblib 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +292 -11
- data/bblib.gemspec +1 -1
- data/lib/bblib/version.rb +1 -1
- data/lib/file/bbfile.rb +9 -9
- data/lib/string/bbstring.rb +1 -1
- data/lib/string/fuzzy_matcher.rb +2 -2
- data/lib/string/matching.rb +21 -25
- data/lib/string/roman.rb +7 -6
- data/lib/time/bbtime.rb +15 -15
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7b1504ef918ed9ec7c63df50cf267714f4fffed
|
4
|
+
data.tar.gz: b9627fa1e26c9932a4c0e8301134560a90e2caab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad8d53750aa9b2842b63cccf814be382468f426f7360e41ff97ca37790a18790110e1f4cff275685982cf2e46b50957e1d8140c2a008eef8c32f695ca5cf11f4
|
7
|
+
data.tar.gz: e1841e600e35928460e7320800f39858e7e5d2cc2d2920e7d1afa4cbf9628160d1d49df9e2c43c2a385bf11d83de83824189039f98f020d91df6a63c1a882e36
|
data/README.md
CHANGED
@@ -32,10 +32,12 @@ BBLib is currently broken up into the following categories:
|
|
32
32
|
* String
|
33
33
|
* Time
|
34
34
|
|
35
|
+
|
36
|
+
|
35
37
|
### File
|
36
38
|
#### File Scanners
|
37
39
|
|
38
|
-
Various simple file scan methods are available. All of these are toggleable-recursive and can be passed filters using
|
40
|
+
Various simple file scan methods are available. All of these are toggleable-recursive and can be passed filters using any wildcarding supported by the Ruby Dir.glob() method.
|
39
41
|
|
40
42
|
```ruby
|
41
43
|
# Scan for any files or folders in a path
|
@@ -82,7 +84,6 @@ A file size parser is available that analyzes known patterns in a string to cons
|
|
82
84
|
```ruby
|
83
85
|
# Turn a string into a file size (in bytes)
|
84
86
|
BBLib.parse_file_size "1MB 100KB"
|
85
|
-
|
86
87
|
#=> 1150976.0
|
87
88
|
```
|
88
89
|
|
@@ -91,13 +92,10 @@ By default the output is in bytes, however, this can be modified using the named
|
|
91
92
|
```ruby
|
92
93
|
# Turn a string into a file size (in bytes)
|
93
94
|
BBLib.parse_file_size "1MB 100KB", output: :megabyte
|
94
|
-
|
95
95
|
#=> 1.09765625
|
96
96
|
|
97
97
|
# The method can also be called directly on a string
|
98
|
-
|
99
98
|
"1.5 Mb".parse_file_size output: :kilobyte
|
100
|
-
|
101
99
|
#=> 1536.0
|
102
100
|
```
|
103
101
|
|
@@ -134,29 +132,30 @@ BBLib.string_to_file '/home/user/my_file', string, false
|
|
134
132
|
string.to_file '/home/user/another_file', true
|
135
133
|
```
|
136
134
|
|
135
|
+
|
136
|
+
|
137
137
|
### Hash
|
138
138
|
|
139
139
|
#### Deep Merge
|
140
140
|
|
141
141
|
A simple implementation of a deep merge algorithm that merges two hashes including nested hashes within them. It can also merge arrays (default) within the hashes and merge values into arrays (not default) rather than overwriting the values with the right side hash.
|
142
142
|
|
143
|
+
Part of the code is based on information found @ http://stackoverflow.com/questions/9381553/ruby-merge-nested-hash
|
144
|
+
|
143
145
|
```ruby
|
144
146
|
h1 = ({value: 1231, array: [1, 2], hash: {a: 1, b_hash: {c: 2, d:3}}})
|
145
147
|
h2 = ({value: 5, array: [6, 7], hash: {a: 1, z: nil, b_hash: {c: 9, d:10, y:10}}})
|
146
148
|
|
147
149
|
# Default behavior merges arrays and overwrites non-array/hash values
|
148
150
|
h1.deep_merge h2
|
149
|
-
|
150
151
|
#=> {:value=>5, :array=>[1, 2, 6, 7], :hash=>{:a=>1, :b_hash=>{:c=>9, :d=>10, :y=>10}, :z=>nil}}
|
151
152
|
|
152
153
|
# Don't overwrite colliding values, instead, place them into an array together
|
153
154
|
h1.deep_merge h2, overwrite_vals: false
|
154
|
-
|
155
155
|
#=> {:value=>[1231, 5], :array=>[1, 2, 6, 7], :hash=>{:a=>[1, 1], :b_hash=>{:c=>[2, 9], :d=>[3, 10], :y=>10}, :z=>nil}}
|
156
156
|
|
157
157
|
# Don't merge arrays, instead, overwrite them.
|
158
158
|
h1.deep_merge h2, merge_arrays: false
|
159
|
-
|
160
159
|
#=> {:value=>5, :array=>[6, 7], :hash=>{:a=>1, :b_hash=>{:c=>9, :d=>10, :y=>10}, :z=>nil}}
|
161
160
|
```
|
162
161
|
|
@@ -169,32 +168,314 @@ Convert all keys within a hash (including nested keys) to symbols. This is usefu
|
|
169
168
|
```ruby
|
170
169
|
h = {"author" => "Tom Clancy", "books" => ["Rainbow Six", "The Hunt for Red October"]}
|
171
170
|
h.keys_to_sym
|
172
|
-
|
173
171
|
#=> {:author=>"Tom Clancy", :books=>["Rainbow Six", "The Hunt for Red October"]}
|
174
172
|
```
|
175
173
|
|
176
174
|
#### Reverse
|
177
175
|
|
178
|
-
Similar to reverse for Array. Calling this will reverse the current order of the Hash's keys. An
|
176
|
+
Similar to reverse for Array. Calling this will reverse the current order of the Hash's keys. An in place version is also available.
|
177
|
+
|
178
|
+
The code behind this is based on a method found @ http://stackoverflow.com/questions/800122/best-way-to-convert-strings-to-symbols-in-hash
|
179
179
|
|
180
180
|
```ruby
|
181
181
|
h = {a:1, b:2, c:3, d:4}
|
182
182
|
h.reverse
|
183
|
-
|
184
183
|
#=> {:d=>4, :c=>3, :b=>2, :a=>1}
|
185
184
|
```
|
186
185
|
|
186
|
+
|
187
|
+
|
187
188
|
### Math
|
188
189
|
|
190
|
+
#### Keep Between
|
191
|
+
|
192
|
+
Used to ensure a numeric value is kept within a set of bounds. The first argument is the number, the second is the minimum of the bounds and the second is the maximum. To specify no min or max simply pass nil as either of the bounds.
|
193
|
+
|
194
|
+
```ruby
|
195
|
+
number = 17
|
196
|
+
BBLib.keep_between number, 0, 10
|
197
|
+
#=> 10
|
198
|
+
|
199
|
+
number = 0.145
|
200
|
+
BBLib.keep_between number, 0.5, 1
|
201
|
+
#=> 0.5
|
202
|
+
|
203
|
+
number = -250
|
204
|
+
BBLib.keep_betwee number, nil, 100
|
205
|
+
#=> -250
|
206
|
+
```
|
207
|
+
|
208
|
+
|
189
209
|
|
190
210
|
### Net
|
191
211
|
Currently empty...
|
192
212
|
|
213
|
+
|
214
|
+
|
193
215
|
### String
|
194
216
|
|
217
|
+
#### FuzzyMatcher
|
218
|
+
|
219
|
+
FuzzyMatcher (BBLib::FuzzyMatcher) is a class for making fuzzy comparisons with strings. It implements a weighted algorithm system which uses the algorithms listed below to generate a percentage based match between two strings. There are various settings that can be toggled in addition. These settings are:
|
220
|
+
|
221
|
+
* **Case Sensitive**: Toggles whether or not strings should be compared in a case sensitive manor.
|
222
|
+
* **Remove Symbols**: Toggle to remove all symbols from the strings before comparing them.
|
223
|
+
* **Move Articles**: Toggling this normalizes the position on preceding or trailing articles (the, an, a).
|
224
|
+
* **Convert Roman**: When toggled to true, all roman numerals found in the strings are converted to integers.
|
225
|
+
|
226
|
+
Current algorithms are:
|
227
|
+
* Levenshtein
|
228
|
+
* Composition
|
229
|
+
* Phrase
|
230
|
+
* Numeric
|
231
|
+
|
232
|
+
```ruby
|
233
|
+
# Create a FuzzyMatcher and set it to be case insensitive
|
234
|
+
fm = BBLib::FuzzyMatcher.new case_sensitive: false
|
235
|
+
|
236
|
+
# Set the weight of two of the algorithms. A weight of zero effectively turns off that algorithm.
|
237
|
+
fm.set_weight :levenshtein, 10
|
238
|
+
fm.set_weight :composition, 5
|
239
|
+
|
240
|
+
# Get similarity as a %
|
241
|
+
fm.similarity 'Ruby', 'Rails'
|
242
|
+
#=> 20.0
|
243
|
+
|
244
|
+
# Set the threshold match percent
|
245
|
+
fm.threshold = 50
|
246
|
+
# Returns true if the match percent is greater than or equal to the threshold
|
247
|
+
fm.match? 'Ruby', 'Rails'
|
248
|
+
#=> false
|
249
|
+
|
250
|
+
# Get the similarity of a string with an Array of strings. A hash is returned
|
251
|
+
# with the key being the string compared and the value being its match %
|
252
|
+
fm.similarities 'Ruby', ['Ruby', 'Rails', 'Java', 'C++']
|
253
|
+
#=> {"Ruby"=>100.0, "Rails"=>20.0, "Java"=>0.0, "C++"=>0.0}
|
254
|
+
|
255
|
+
# Compare a string to an Array of strings but return only the match with the highest comparison result
|
256
|
+
fm.best_match 'Ruby', ['Ruby', 'Rails', 'Java', 'C++']
|
257
|
+
#=> 'Ruby'
|
258
|
+
```
|
259
|
+
|
260
|
+
|
261
|
+
#### String Comparisons
|
262
|
+
|
263
|
+
**ALGORITHIMS**
|
264
|
+
|
265
|
+
Implementations of the following algorithms are currently available. All algorithms are for calculating similarity between strings. Most are useful for fuzzy matching. All algorithms are available statically in the BBLib module but are also available as extensions to the String class. Most of these algorithms are case sensitive by default.
|
266
|
+
|
267
|
+
1 - Levenshtein Distance
|
268
|
+
|
269
|
+
A fairly simple rendition of the Levenshtein distance algorithm in Ruby. There are two functions available: **levenshtein_distance** and **levenshtein_similarity**. The former, calculates the number of additions, removals or substitutions needed to turn one string into another. The latter, uses the distance to calculate a percentage based match of two strings.
|
270
|
+
|
271
|
+
```ruby
|
272
|
+
# Get the Levenshtein distance of two strings
|
273
|
+
'Ruby is great'.levenshtein_distance 'Rails is great'
|
274
|
+
# OR
|
275
|
+
BBLib.levenshtein_distance 'Ruby is great', 'Rails is great'
|
276
|
+
#=> 4
|
277
|
+
|
278
|
+
# Or calculate the similarity as a percent
|
279
|
+
'Ruby is great'.levenshtein_similarity 'Rails is great'
|
280
|
+
#=> 71.42857142857143
|
281
|
+
```
|
282
|
+
|
283
|
+
2 - String Composition
|
284
|
+
|
285
|
+
Compares the character composition of two strings. The order of characters is not relevant, however, the number of occurrences is factored in.
|
286
|
+
|
287
|
+
```ruby
|
288
|
+
'Ruby is great'.composition_similarity 'Rails is great'
|
289
|
+
#=> 71.42857142857143
|
290
|
+
```
|
291
|
+
|
292
|
+
3 - Phrase Similarity
|
293
|
+
|
294
|
+
Checks to see how many words in a string match another. Words must match exactly, including case. The results is the percentage of words that have an exact pair. The number of occurrences is also a factor.
|
295
|
+
|
296
|
+
```ruby
|
297
|
+
'Learn Ruby, it is great'.phrase_similarity 'Learn Rails; it is awesome'
|
298
|
+
#=> 60.0
|
299
|
+
|
300
|
+
'ruby, ruby, ruby'.phrase_similarity 'ruby ruby'
|
301
|
+
#=> 66.66666666666666
|
302
|
+
```
|
303
|
+
|
304
|
+
4 - Numeric Similarity (In Progress)
|
305
|
+
|
306
|
+
This algorithm is currently undergoing refactoring...
|
307
|
+
|
308
|
+
5 - QWERTY Similarity
|
309
|
+
|
310
|
+
A basic method that compares two strings by measuring the physical difference from one char to another on a QWERTY keyboard (alpha-numeric only). May be useful for detecting typos in words, but becomes less useful depending on the length of the string. This method is still in development and not yet in a final state. Currently a total distance is returned. Eventually, a percentage based match will replace this.
|
311
|
+
|
312
|
+
```ruby
|
313
|
+
'q'.qwerty_distance 's'
|
314
|
+
#=> 2
|
315
|
+
|
316
|
+
'qwerty'.qwerty_distance 'qsertp'
|
317
|
+
#=> 5
|
318
|
+
```
|
319
|
+
|
320
|
+
#### Roman Numeral
|
321
|
+
|
322
|
+
**to_roman**
|
323
|
+
|
324
|
+
Converts an integer into a roman numeral. Supports numbers up to 1000 ('M'). Anything greater will simply return a string version of the integer. Can be called directly on any Fixnum object as well as from the BBLib module.
|
325
|
+
|
326
|
+
```ruby
|
327
|
+
BBLib.to_roman 20
|
328
|
+
#=> 'XX'
|
329
|
+
|
330
|
+
15.to_roman
|
331
|
+
#=> 'XV'
|
332
|
+
```
|
333
|
+
|
334
|
+
**string_to_roman**
|
335
|
+
|
336
|
+
Converts any integers found in a string into their roman numeral equivalent. Numbers will only be converted if they are surrounded by white space or by symbols. If the integer is embedded within alpha characters or contains a decimal, it is left untouched.
|
337
|
+
|
338
|
+
The method is also extended to the String class to be called directly.
|
339
|
+
|
340
|
+
```ruby
|
341
|
+
BBLib.string_to_roman "Toy Story 3"
|
342
|
+
#= "Toy Story III"
|
343
|
+
|
344
|
+
"Die Hard 2: Die Harder".to_roman
|
345
|
+
#=> "Die Hard II: Die Harder"
|
346
|
+
|
347
|
+
"Left4Dead".to_roman
|
348
|
+
#=> "Left4Dead"
|
349
|
+
|
350
|
+
"Ruby 2.2".to_roman
|
351
|
+
#=> "Ruby 2.2"
|
352
|
+
```
|
353
|
+
|
354
|
+
**from_roman**
|
355
|
+
|
356
|
+
The opposite of _string_to_roman_. Parses a string for roman numerals and converts them into integers. Also extended to the String class to call directly. Works similarly to _to_roman_ in that numerals are converted only if surrounded by white space or symbols.
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
BBLib.from_roman "Toy Story III"
|
360
|
+
#=> 'Toy Story 3'
|
361
|
+
|
362
|
+
"Super Mario Land II: Six Golden Coins".from_roman
|
363
|
+
#=> 'Super Mario Land 2: Six Golden Coins'
|
364
|
+
|
365
|
+
"Donkey Kong CountryIII".from_roman
|
366
|
+
#=> 'Donkey Kong CountryIII'
|
367
|
+
```
|
368
|
+
|
369
|
+
|
370
|
+
#### Other
|
371
|
+
|
372
|
+
**msplit** _aka multi split_
|
373
|
+
|
374
|
+
_msplit_ is similar to the String method split, except it can take an array of string delimiters rather than a single delim. The string is split be each delimiter in order and an Array is returned.
|
375
|
+
|
376
|
+
```ruby
|
377
|
+
"This_is.a&&&&test".msplit ['_', '.', '&']
|
378
|
+
|
379
|
+
#=> ['This', 'is', 'a', 'test']
|
380
|
+
```
|
381
|
+
|
382
|
+
By default any empty items from the return Array are removed. This behavior can be changed using the _:keep_empty_ named param.
|
383
|
+
|
384
|
+
```ruby
|
385
|
+
"This_is.a&&&&test".msplit ['_', '.', '&'], keep_empty: true
|
386
|
+
|
387
|
+
#=> ['This', 'is', 'a', '', '', '', 'test']
|
388
|
+
```
|
389
|
+
|
390
|
+
_msplit is only available directly from an instantiated String object._
|
391
|
+
|
392
|
+
**move_articles**
|
393
|
+
|
394
|
+
This method is used to normalize strings that contain titles. It parses a string and checks to see if _the_, _an_ or _a_ are in the title, either preceding or trailing. If they are found they are moved to the front, back or removed depending on the argument passed to _position_.
|
395
|
+
|
396
|
+
The method is available via the BBLib module or any instance of String.
|
397
|
+
|
398
|
+
```ruby
|
399
|
+
title = "The Simpsons"
|
400
|
+
title.move_articles :back
|
401
|
+
|
402
|
+
#=> "Simpons, The"
|
403
|
+
|
404
|
+
title.move_articles :none
|
405
|
+
|
406
|
+
#=> "Simpsons"
|
407
|
+
|
408
|
+
title = "Day to Remember, A"
|
409
|
+
title.move_articles :front
|
410
|
+
|
411
|
+
#=> "A Day to Remember"
|
412
|
+
```
|
413
|
+
|
414
|
+
**drop_symbols**
|
415
|
+
|
416
|
+
A simple method to remove all non-alpha, non-numeric and non-whitespace characters from a string. Extended to the String class.
|
417
|
+
|
418
|
+
**extract_integers**
|
419
|
+
|
420
|
+
Returns an array of all integers found within a string. The named param _:convert_ can be set to true to convert the extract numbers into Fixnums. If left false, strings are returned instead.
|
421
|
+
|
422
|
+
**extract_floats**
|
423
|
+
|
424
|
+
Performs the same action as _extract_integers_ except it can also pull floats from a string. The _:convert_ param is also available, but converts the strings into floats.
|
425
|
+
|
426
|
+
**extract_numbers**
|
427
|
+
|
428
|
+
See above. Is an alias for _extract_floats_.
|
429
|
+
|
430
|
+
|
195
431
|
|
196
432
|
### Time
|
197
433
|
|
434
|
+
#### Duration parser
|
435
|
+
|
436
|
+
**Parsing a duration from String**
|
437
|
+
|
438
|
+
Similar to the file size parser under the files section, but instead can parse duration from know time patterns in a string. By default the result is returned in seconds, but this can be changed using the named param _:output_. The method is also extended to the String class directly.
|
439
|
+
|
440
|
+
```ruby
|
441
|
+
"1hr 10 minutes 11s".parse_duration
|
442
|
+
|
443
|
+
#=> 4211.0
|
444
|
+
|
445
|
+
"1hr 10 minutes 11s".parse_duration output: :hour
|
446
|
+
|
447
|
+
#=> 1.1697222222222223
|
448
|
+
```
|
449
|
+
Output options are:
|
450
|
+
* :mili
|
451
|
+
* :sec
|
452
|
+
* :min
|
453
|
+
* :hour
|
454
|
+
* :day
|
455
|
+
* :week
|
456
|
+
* :month
|
457
|
+
* :year
|
458
|
+
|
459
|
+
**Create a duration String from Numeric**
|
460
|
+
|
461
|
+
There is also a method to turn a Numeric object into a string representation of a duration. This method is extended to the Numeric class. An input may be specified to tell the method what the input number represents. The options for this are the same as the output options listed above. A stop can be added using any of those same options. This will prevent the string from containing anything below the specified time type. For instance, specifying _stop: :sec_ will prevent milliseconds from being included if there are any. There are also three options that can be passed to the _:style_ argument to change the output (options are _:full_, _:medium_ and _:short:).
|
462
|
+
|
463
|
+
```ruby
|
464
|
+
9645.to_duration
|
465
|
+
#=> '2 hrs 40 mins 45 secs'
|
466
|
+
|
467
|
+
101.to_duration input: :hour
|
468
|
+
#=> '4 days 5 hrs'
|
469
|
+
|
470
|
+
20.56.to_duration input: :hour, style: :full
|
471
|
+
#=> '20 hours 33 minutes 36 seconds'
|
472
|
+
|
473
|
+
20.56123.to_duration input: :hour, style: :medium, stop: :min
|
474
|
+
#=> '20 hrs 33 mins'
|
475
|
+
|
476
|
+
123124.to_duration( style: :short)
|
477
|
+
#=> '34h 12m 4s'
|
478
|
+
```
|
198
479
|
|
199
480
|
## Development
|
200
481
|
|
data/bblib.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["d2sm10@hotmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{A library containing many reusable, basic functions.}
|
13
|
-
spec.description = %q{
|
13
|
+
spec.description = %q{A library containing many reusable, basic functions.}
|
14
14
|
spec.homepage = "https://github.com/bblack16/bblib-ruby"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
data/lib/bblib/version.rb
CHANGED
data/lib/file/bbfile.rb
CHANGED
@@ -36,7 +36,7 @@ module BBLib
|
|
36
36
|
bytes = 0.0
|
37
37
|
FILE_SIZES.each do |k, v|
|
38
38
|
v[:exp].each do |e|
|
39
|
-
numbers = str.scan(/(?=\w|\D|\A)\d?\.?\d+[[:space:]]*#{e}(?=\W|\d|\z)/i)
|
39
|
+
numbers = str.scan(/(?=\w|\D|\A)\d?\.?\d+[[:space:]]*#{e}s?(?=\W|\d|\z)/i)
|
40
40
|
numbers.each{ |n| bytes+= n.to_f * v[:mult] }
|
41
41
|
end
|
42
42
|
end
|
@@ -45,14 +45,14 @@ module BBLib
|
|
45
45
|
|
46
46
|
FILE_SIZES = {
|
47
47
|
byte: { mult: 1, exp: ['b', 'byt', 'byte'] },
|
48
|
-
kilobyte: { mult: 1024, exp: ['kb', 'kilo', 'k', 'kbyte', '
|
49
|
-
megabyte: { mult: 1048576, exp: ['mb', 'mega', 'm', 'mib', 'mbyte', '
|
50
|
-
gigabyte: { mult: 1073741824, exp: ['gb', 'giga', 'g', 'gbyte', '
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
48
|
+
kilobyte: { mult: 1024, exp: ['kb', 'kilo', 'k', 'kbyte', 'kilobyte'] },
|
49
|
+
megabyte: { mult: 1048576, exp: ['mb', 'mega', 'm', 'mib', 'mbyte', 'megabyte'] },
|
50
|
+
gigabyte: { mult: 1073741824, exp: ['gb', 'giga', 'g', 'gbyte', 'gigabyte'] },
|
51
|
+
terabyte: { mult: 1099511627776, exp: ['tb', 'tera', 't', 'tbyte', 'terabyte'] },
|
52
|
+
petabyte: { mult: 1125899906842624, exp: ['pb', 'peta', 'p', 'pbyte', 'petabyte'] },
|
53
|
+
exabyte: { mult: 1152921504606846976, exp: ['eb', 'exa', 'e', 'ebyte', 'exabyte'] },
|
54
|
+
zettabyte: { mult: 1180591620717411303424, exp: ['zb', 'zetta', 'z', 'zbyte', 'zettabyte'] },
|
55
|
+
yottabyte: { mult: 1208925819614629174706176, exp: ['yb', 'yotta', 'y', 'ybyte', 'yottabyte'] }
|
56
56
|
}
|
57
57
|
|
58
58
|
end
|
data/lib/string/bbstring.rb
CHANGED
@@ -30,7 +30,7 @@ module BBLib
|
|
30
30
|
end
|
31
31
|
|
32
32
|
# Used to move the position of the articles 'the', 'a' and 'an' in strings for normalization.
|
33
|
-
def self.move_articles str, position = :front, capitalize
|
33
|
+
def self.move_articles str, position = :front, capitalize: true
|
34
34
|
return str unless [:front, :back, :none].include? position
|
35
35
|
articles = ["the", "a", "an"]
|
36
36
|
articles.each do |a|
|
data/lib/string/fuzzy_matcher.rb
CHANGED
@@ -58,8 +58,8 @@ module BBLib
|
|
58
58
|
levenshtein: {weight: 10, signature: :levenshtein_similarity},
|
59
59
|
composition: {weight: 5, signature: :composition_similarity},
|
60
60
|
numeric: {weight: 0, signature: :numeric_similarity},
|
61
|
-
phrase: {weight: 0, signature: :phrase_similarity}
|
62
|
-
qwerty: {weight: 0, signature: :qwerty_similarity}
|
61
|
+
phrase: {weight: 0, signature: :phrase_similarity}
|
62
|
+
# FUTURE qwerty: {weight: 0, signature: :qwerty_similarity}
|
63
63
|
}
|
64
64
|
|
65
65
|
def prep_strings a, b
|
data/lib/string/matching.rb
CHANGED
@@ -5,8 +5,7 @@
|
|
5
5
|
module BBLib
|
6
6
|
|
7
7
|
# A simple rendition of the levenshtein distance algorithm
|
8
|
-
def self.levenshtein_distance a, b
|
9
|
-
if !case_sensitive then a, b = a.downcase, b.downcase end
|
8
|
+
def self.levenshtein_distance a, b
|
10
9
|
costs = (0..b.length).to_a
|
11
10
|
(1..a.length).each do |i|
|
12
11
|
costs[0], nw = i, i - 1
|
@@ -18,15 +17,14 @@ module BBLib
|
|
18
17
|
end
|
19
18
|
|
20
19
|
# Calculates a percentage based match using the levenshtein distance algorithm
|
21
|
-
def self.levenshtein_similarity a, b
|
22
|
-
distance = BBLib.levenshtein_distance a, b
|
20
|
+
def self.levenshtein_similarity a, b
|
21
|
+
distance = BBLib.levenshtein_distance a, b
|
23
22
|
max = [a.length, b.length].max.to_f
|
24
23
|
return ((max - distance.to_f) / max) * 100.0
|
25
24
|
end
|
26
25
|
|
27
26
|
# Calculates a percentage based match of two strings based on their character composition.
|
28
|
-
def self.composition_similarity a, b
|
29
|
-
if !case_sensitive then a, b = a.downcase, b.downcase end
|
27
|
+
def self.composition_similarity a, b
|
30
28
|
if a.length <= b.length then t = a; a = b; b = t; end
|
31
29
|
matches, temp = 0, b
|
32
30
|
a.chars.each do |c|
|
@@ -39,11 +37,10 @@ module BBLib
|
|
39
37
|
end
|
40
38
|
|
41
39
|
# Calculates a percentage based match between two strings based on the similarity of word matches.
|
42
|
-
def self.phrase_similarity a, b
|
43
|
-
|
44
|
-
temp = b.split ' '
|
40
|
+
def self.phrase_similarity a, b
|
41
|
+
temp = b.drop_symbols.split ' '
|
45
42
|
matches = 0
|
46
|
-
a.split(' ').each do |w|
|
43
|
+
a.drop_symbols.split(' ').each do |w|
|
47
44
|
if temp.include? w
|
48
45
|
matches+=1
|
49
46
|
temp.delete_at temp.find_index w
|
@@ -54,8 +51,7 @@ module BBLib
|
|
54
51
|
|
55
52
|
# Extracts all numbers from two strings and compares them and generates a percentage of match.
|
56
53
|
# Percentage calculations here need to be weighted better...TODO
|
57
|
-
def self.numeric_similarity a, b
|
58
|
-
if !case_sensitive then a, b = a.downcase, b.downcase end
|
54
|
+
def self.numeric_similarity a, b
|
59
55
|
a, b = a.extract_numbers, b.extract_numbers
|
60
56
|
return 100.0 if a.empty? && b.empty?
|
61
57
|
matches = []
|
@@ -67,7 +63,7 @@ module BBLib
|
|
67
63
|
|
68
64
|
# A simple character distance calculator that uses qwerty key positions to determine how similar two strings are.
|
69
65
|
# May be useful for typo detection.
|
70
|
-
def self.
|
66
|
+
def self.qwerty_distance a, b
|
71
67
|
a, b = a.downcase.strip, b.downcase.strip
|
72
68
|
if a.length <= b.length then t = a; a = b; b = t; end
|
73
69
|
qwerty = {
|
@@ -93,27 +89,27 @@ module BBLib
|
|
93
89
|
end
|
94
90
|
|
95
91
|
class String
|
96
|
-
def levenshtein_distance str
|
97
|
-
BBLib.levenshtein_distance self, str
|
92
|
+
def levenshtein_distance str
|
93
|
+
BBLib.levenshtein_distance self, str
|
98
94
|
end
|
99
95
|
|
100
|
-
def levenshtein_similarity str
|
101
|
-
BBLib.levenshtein_similarity self, str
|
96
|
+
def levenshtein_similarity str
|
97
|
+
BBLib.levenshtein_similarity self, str
|
102
98
|
end
|
103
99
|
|
104
|
-
def composition_similarity str
|
105
|
-
BBLib.composition_similarity self, str
|
100
|
+
def composition_similarity str
|
101
|
+
BBLib.composition_similarity self, str
|
106
102
|
end
|
107
103
|
|
108
|
-
def phrase_similarity str
|
109
|
-
BBLib.phrase_similarity self, str
|
104
|
+
def phrase_similarity str
|
105
|
+
BBLib.phrase_similarity self, str
|
110
106
|
end
|
111
107
|
|
112
|
-
def numeric_similarity str
|
113
|
-
BBLib.numeric_similarity self, str
|
108
|
+
def numeric_similarity str
|
109
|
+
BBLib.numeric_similarity self, str
|
114
110
|
end
|
115
111
|
|
116
|
-
def
|
117
|
-
BBLib.
|
112
|
+
def qwerty_distance str
|
113
|
+
BBLib.qwerty_distance self, str
|
118
114
|
end
|
119
115
|
end
|
data/lib/string/roman.rb
CHANGED
@@ -3,11 +3,12 @@ module BBLib
|
|
3
3
|
|
4
4
|
# Converts any integer up to 1000 to a roman numeral string_a
|
5
5
|
def self.to_roman num
|
6
|
+
return num.to_s if num > 1000
|
6
7
|
roman = {1000 => 'M', 900 => 'CM', 500 => 'D', 400 => 'CD', 100 => 'C', 90 => 'XC', 50 => 'L',
|
7
8
|
40 => 'XL', 10 => 'X', 9 => 'IX', 5 => 'V', 4 => 'IV', 3 => 'III', 2 => 'II', 1 => 'I'}
|
8
9
|
numeral = ""
|
9
10
|
roman.each do |n, r|
|
10
|
-
|
11
|
+
while num >= n
|
11
12
|
num-= n
|
12
13
|
numeral+= r
|
13
14
|
end
|
@@ -18,8 +19,8 @@ module BBLib
|
|
18
19
|
def self.string_to_roman str
|
19
20
|
sp = str.split ' '
|
20
21
|
sp.map! do |s|
|
21
|
-
if s.to_i.to_s == s
|
22
|
-
BBLib.to_roman
|
22
|
+
if s.drop_symbols.to_i.to_s == s.drop_symbols && !(s =~ /\d+\.\d+/)
|
23
|
+
s.sub!(s.scan(/\d+/).first.to_s, BBLib.to_roman(s.to_i))
|
23
24
|
else
|
24
25
|
s
|
25
26
|
end
|
@@ -34,8 +35,8 @@ module BBLib
|
|
34
35
|
num = BBLib.to_roman n
|
35
36
|
if !sp.select{ |i| i[/#{num}/i]}.empty?
|
36
37
|
for i in 0..(sp.length-1)
|
37
|
-
if sp[i].upcase == num
|
38
|
-
sp[i]
|
38
|
+
if sp[i].drop_symbols.upcase == num
|
39
|
+
sp[i].sub!(num ,n.to_s)
|
39
40
|
end
|
40
41
|
end
|
41
42
|
end
|
@@ -45,7 +46,7 @@ module BBLib
|
|
45
46
|
|
46
47
|
end
|
47
48
|
|
48
|
-
class
|
49
|
+
class Fixnum
|
49
50
|
def to_roman
|
50
51
|
BBLib.to_roman self.to_i
|
51
52
|
end
|
data/lib/time/bbtime.rb
CHANGED
@@ -17,7 +17,7 @@ module BBLib
|
|
17
17
|
end
|
18
18
|
|
19
19
|
# Turns a numeric input into a time string.
|
20
|
-
def self.to_duration num, input: :sec, stop: :
|
20
|
+
def self.to_duration num, input: :sec, stop: :milli, style: :medium
|
21
21
|
return nil unless Numeric === num || num > 0
|
22
22
|
if ![:full, :medium, :short].include?(style) then style = :medium end
|
23
23
|
expression = []
|
@@ -26,7 +26,7 @@ module BBLib
|
|
26
26
|
next unless !done
|
27
27
|
div = n / v[:mult]
|
28
28
|
if div > 1
|
29
|
-
expression << "#{div.floor}
|
29
|
+
expression << "#{div.floor}#{v[:styles][style]}#{div.floor > 1 && style != :short ? "s" : nil}"
|
30
30
|
n-= div.floor * v[:mult]
|
31
31
|
end
|
32
32
|
if k == stop then done = true end
|
@@ -35,50 +35,50 @@ module BBLib
|
|
35
35
|
end
|
36
36
|
|
37
37
|
TIME_EXPS = {
|
38
|
-
|
38
|
+
milli: {
|
39
39
|
mult: 0.001,
|
40
|
-
styles: {full: '
|
41
|
-
exp: ['ms', 'mil', 'mils', '
|
40
|
+
styles: {full: ' millisecond', medium: ' milli', short: 'ms'},
|
41
|
+
exp: ['ms', 'mil', 'mils', 'milli', 'millis', 'millisecond', 'milliseconds', 'milsec', 'milsecs', 'msec', 'msecs', 'msecond', 'mseconds']},
|
42
42
|
sec: {
|
43
43
|
mult: 1,
|
44
|
-
styles: {full: 'second', medium: 'sec', short: 's'},
|
44
|
+
styles: {full: ' second', medium: ' sec', short: 's'},
|
45
45
|
exp: ['s', 'sec', 'secs', 'second', 'seconds']},
|
46
46
|
min: {
|
47
47
|
mult: 60,
|
48
|
-
styles: {full: 'minute', medium: 'min', short: 'm'},
|
48
|
+
styles: {full: ' minute', medium: ' min', short: 'm'},
|
49
49
|
exp: ['m', 'mn', 'mns', 'min', 'mins', 'minute', 'minutes']},
|
50
50
|
hour: {
|
51
51
|
mult: 3600,
|
52
|
-
styles: {full: 'hour', medium: 'hr', short: 'h'},
|
52
|
+
styles: {full: ' hour', medium: ' hr', short: 'h'},
|
53
53
|
exp: ['h', 'hr', 'hrs', 'hour', 'hours']},
|
54
54
|
day: {
|
55
55
|
mult: 86400,
|
56
|
-
styles: {full: 'day', medium: 'day', short: 'd'},
|
56
|
+
styles: {full: ' day', medium: ' day', short: 'd'},
|
57
57
|
exp: ['d', 'day' 'days']},
|
58
58
|
week: {
|
59
59
|
mult: 604800,
|
60
|
-
styles: {full: 'week', medium: 'wk', short: 'w'},
|
60
|
+
styles: {full: ' week', medium: ' wk', short: 'w'},
|
61
61
|
exp: ['w', 'wk', 'wks', 'week', 'weeks']},
|
62
62
|
month: {
|
63
63
|
mult: 2592000,
|
64
|
-
styles: {full: 'month', medium: 'mo', short: 'mo'},
|
64
|
+
styles: {full: ' month', medium: ' mo', short: 'mo'},
|
65
65
|
exp: ['mo', 'mon', 'mons', 'month', 'months', 'mnth', 'mnths', 'mth', 'mths']},
|
66
66
|
year: {
|
67
67
|
mult: 31536000,
|
68
|
-
styles: {full: 'year', medium: 'yr', short: 'y'},
|
68
|
+
styles: {full: ' year', medium: ' yr', short: 'y'},
|
69
69
|
exp: ['y', 'yr', 'yrs', 'year', 'years']}
|
70
70
|
}
|
71
71
|
|
72
72
|
end
|
73
73
|
|
74
74
|
class String
|
75
|
-
def parse_duration
|
76
|
-
BBLib.parse_duration self,
|
75
|
+
def parse_duration output: :sec
|
76
|
+
BBLib.parse_duration self, output:output
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
80
|
class Numeric
|
81
|
-
def to_duration input: :sec, stop: :
|
81
|
+
def to_duration input: :sec, stop: :milli, style: :medium
|
82
82
|
BBLib.to_duration self, input: input, stop: stop, style: style
|
83
83
|
end
|
84
84
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bblib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brandon Black
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description:
|
55
|
+
description: A library containing many reusable, basic functions.
|
56
56
|
email:
|
57
57
|
- d2sm10@hotmail.com
|
58
58
|
executables: []
|