scorer 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,82 @@
1
+ module Scorer
2
+ def score(abbreviation, fuzziness = nil)
3
+ # If the string is equal to the abbreviation, perfect match.
4
+ return 1 if self == abbreviation
5
+
6
+ string = self
7
+ string_length = string.length.to_f
8
+ total_character_score = 0
9
+ should_award_common_prefix_bonus = false
10
+ fuzzies = 1
11
+
12
+ # Walk through abbreviation and add up scores
13
+ abbreviation.chars.each_with_index do |c, i|
14
+ # Find the index of current character (case-insensitive) in remaining part
15
+ # of string.
16
+ indexes = [string.index(c.downcase), string.index(c.upcase)]
17
+ indexes = indexes - [nil]
18
+ index_in_string = indexes.min
19
+
20
+ if index_in_string
21
+ # Set base score for current character.
22
+ character_score = 0.1
23
+ else
24
+ return 0 unless fuzziness
25
+ fuzzies += 1 - fuzziness;
26
+ next
27
+ end
28
+
29
+ # Same case bonus.
30
+ if string[index_in_string].chr == c
31
+ character_score += 0.1
32
+ end
33
+
34
+ # Consecutive letter & start-of-string bonus
35
+ if index_in_string == 0
36
+ # Increase the score when matching first character of the
37
+ # remainder of the string
38
+ character_score += 0.6;
39
+
40
+ # If match is the first character of the string
41
+ # & the first character of abbreviation, add a
42
+ # start-of-string match bonus.
43
+ should_award_common_prefix_bonus = true if i == 0
44
+ end
45
+
46
+ # Acronym Bonus
47
+ # Weighing Logic: Typing the first character of an acronym is as if you
48
+ # preceded it with two perfect character matches.
49
+ if string[index_in_string - 1].chr == ' '
50
+ character_score += 0.8
51
+ end
52
+
53
+ # Left trim the matched part of the string
54
+ # (forces sequential matching).
55
+ string = string.slice(index_in_string + 1, string_length)
56
+
57
+ # Add to total character score.
58
+ total_character_score += character_score
59
+ end
60
+
61
+ abbreviation_score = total_character_score / abbreviation.length
62
+
63
+ # Reduce penalty for longer strings
64
+ percentage_of_matched_string = abbreviation.length / string_length
65
+ word_score = abbreviation_score * percentage_of_matched_string
66
+ final_score = (word_score + abbreviation_score) / 2.0
67
+
68
+ # Penalize any fuzzies
69
+ final_score = final_score / fuzzies;
70
+
71
+ # Award common prefix bonus if possible
72
+ if should_award_common_prefix_bonus && (final_score + 0.15 < 1)
73
+ final_score += 0.15
74
+ end
75
+
76
+ return final_score
77
+ end
78
+ end
79
+
80
+ class String
81
+ include Scorer
82
+ end
@@ -0,0 +1,3 @@
1
+ module Scorer
2
+ Version = VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,81 @@
1
+ require 'spec_helper'
2
+
3
+ describe Scorer do
4
+ describe "score" do
5
+ it "returns a perfect score for identical strings" do
6
+ "Hello World".score("Hello World").should == 1
7
+ end
8
+
9
+ it "returns no score for abbreviations with inexistant characters" do
10
+ "hello world".score("hellx").should == 0
11
+ "hello world".score("hello_world").should == 0
12
+ end
13
+
14
+ it "returns no score for a non-sequential match" do
15
+ "Hello World".score("WH").should == 0
16
+ end
17
+
18
+ it "returns higher score for same case than for wrong case" do
19
+ "Hello World".score("hello").should be < "Hello World".score("Hello")
20
+ end
21
+
22
+ it "returns higher score for closer matches" do
23
+ "Hello World".score("H").should be < "Hello World".score("He")
24
+ end
25
+
26
+ it "returns a score even with wrong casing" do
27
+ "Hillsdale Michigan".score("himi").should be > 0
28
+ end
29
+
30
+ it "returns a higher score for closer matches" do
31
+ "hello world".score("e").should be < "hello world".score("h")
32
+ "hello world".score("h").should be < "hello world".score("he")
33
+ "hello world".score("hel").should be < "hello world".score("hell")
34
+ "hello world".score("hell").should be < "hello world".score("hello")
35
+ "hello world".score("hello").should be < "hello world".score("helloworld")
36
+ "hello world".score("helloworl").should be < "hello world".score("hello worl")
37
+ "hello world".score("hello worl").should be < "hello world".score("hello world")
38
+ end
39
+
40
+ it "gives a bonus for consecutive matches" do
41
+ "Hello World".score("Hel").should be > "Hello World".score("Hld")
42
+ end
43
+
44
+ it "gives a bonus for acronyms" do
45
+ "Hello World".score("HW").should be > "Hello World".score("Ho")
46
+ "yet another Hello World".score("yaHW").should be > "Hello World".score("yet another")
47
+ "Hillsdale Michigan".score("HiMi").should be > "Hillsdale Michigan".score("Hil")
48
+ "Hillsdale Michigan".score("HiMi").should be > "Hillsdale Michigan".score("illsda")
49
+ "Hillsdale Michigan".score("HiMi").should be > "Hillsdale Michigan".score("hills")
50
+ "Hillsdale Michigan".score("HiMi").should be < "Hillsdale Michigan".score("hillsd")
51
+ end
52
+
53
+ it "gives a bonus for matching beginning of string" do
54
+ "Hillsdale".score("hi").should be > "Chippewa".score("hi")
55
+ "hello world".score("h").should be > "hello world".score("w")
56
+ end
57
+
58
+ it "weighs strings properly" do
59
+ "Research Resources North".score("res").should be > "Mary Conces".score('res')
60
+ "Research Resources North".score("res").should be > "Bonnie Strathern - Southwest Michigan Title Search".score('res')
61
+ end
62
+
63
+ describe "fuzzy scoring" do
64
+ it "scores fuzzy lower than non-fuzzy" do
65
+ "Hello World".score("Hz", 0.5).should be < "Hello World".score("H", 0.5)
66
+ end
67
+
68
+ it "scores mismatch lower" do
69
+ "hello world".score("hello worl", 0.5).should be > "hello world".score("hello wor1", 0.5)
70
+ end
71
+
72
+ it "returns fuzzy scores" do
73
+ "Hello World".score("jello", 0.5).should be > 0
74
+ end
75
+
76
+ it "returns higher scores for higher fuzziness" do
77
+ "Hello World".score("Hz", 0.9).should be > "Hello World".score("Hz", 0.5)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1 @@
1
+ require 'scorer'
@@ -0,0 +1,26 @@
1
+ require 'benchmark'
2
+ require 'scorer'
3
+
4
+ iterations = 4000
5
+ puts "Benchmarking #{iterations} iterations..."
6
+ Benchmark.bmbm do |x|
7
+ x.report("one letter") do
8
+ iterations.times { "hello world".score("h") }
9
+ end
10
+ x.report("two letters") do
11
+ iterations.times { "hello world".score("hw") }
12
+ end
13
+ x.report("full match") do
14
+ iterations.times { "hello world".score("hello world") }
15
+ end
16
+ x.report("13 char / 32 char") do
17
+ iterations.times do
18
+ "hello any world that will listen".score("hlo wrdthlstn")
19
+ end
20
+ end
21
+ x.report("70 char / 446 char") do
22
+ iterations.times do
23
+ "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.".score("Lorem i dor coecadipg et, Duis aute irure dole nulla. qui ofa mot am l")
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,816 @@
1
+ !RBIX
2
+ 10937318184790222022
3
+ x
4
+ M
5
+ 1
6
+ n
7
+ n
8
+ x
9
+ 10
10
+ __script__
11
+ i
12
+ 51
13
+ 5
14
+ 7
15
+ 0
16
+ 64
17
+ 47
18
+ 49
19
+ 1
20
+ 1
21
+ 15
22
+ 5
23
+ 7
24
+ 2
25
+ 64
26
+ 47
27
+ 49
28
+ 1
29
+ 1
30
+ 15
31
+ 7
32
+ 3
33
+ 19
34
+ 0
35
+ 15
36
+ 5
37
+ 7
38
+ 4
39
+ 20
40
+ 0
41
+ 47
42
+ 101
43
+ 5
44
+ 7
45
+ 6
46
+ 63
47
+ 3
48
+ 47
49
+ 49
50
+ 7
51
+ 1
52
+ 15
53
+ 45
54
+ 8
55
+ 9
56
+ 56
57
+ 10
58
+ 50
59
+ 11
60
+ 0
61
+ 15
62
+ 2
63
+ 11
64
+ I
65
+ 5
66
+ I
67
+ 1
68
+ I
69
+ 0
70
+ I
71
+ 0
72
+ n
73
+ p
74
+ 12
75
+ s
76
+ 9
77
+ benchmark
78
+ x
79
+ 7
80
+ require
81
+ s
82
+ 5
83
+ score
84
+ I
85
+ fa0
86
+ s
87
+ 13
88
+ Benchmarking
89
+ x
90
+ 4
91
+ to_s
92
+ s
93
+ 14
94
+ iterations...
95
+ x
96
+ 4
97
+ puts
98
+ x
99
+ 9
100
+ Benchmark
101
+ n
102
+ M
103
+ 1
104
+ p
105
+ 2
106
+ x
107
+ 9
108
+ for_block
109
+ t
110
+ n
111
+ x
112
+ 9
113
+ __block__
114
+ i
115
+ 59
116
+ 57
117
+ 19
118
+ 0
119
+ 15
120
+ 20
121
+ 0
122
+ 7
123
+ 0
124
+ 64
125
+ 56
126
+ 1
127
+ 50
128
+ 2
129
+ 1
130
+ 15
131
+ 20
132
+ 0
133
+ 7
134
+ 3
135
+ 64
136
+ 56
137
+ 4
138
+ 50
139
+ 2
140
+ 1
141
+ 15
142
+ 20
143
+ 0
144
+ 7
145
+ 5
146
+ 64
147
+ 56
148
+ 6
149
+ 50
150
+ 2
151
+ 1
152
+ 15
153
+ 20
154
+ 0
155
+ 7
156
+ 7
157
+ 64
158
+ 56
159
+ 8
160
+ 50
161
+ 2
162
+ 1
163
+ 15
164
+ 20
165
+ 0
166
+ 7
167
+ 9
168
+ 64
169
+ 56
170
+ 10
171
+ 50
172
+ 2
173
+ 1
174
+ 11
175
+ I
176
+ 5
177
+ I
178
+ 1
179
+ I
180
+ 1
181
+ I
182
+ 1
183
+ n
184
+ p
185
+ 11
186
+ s
187
+ 10
188
+ one letter
189
+ M
190
+ 1
191
+ p
192
+ 2
193
+ x
194
+ 9
195
+ for_block
196
+ t
197
+ n
198
+ x
199
+ 9
200
+ __block__
201
+ i
202
+ 9
203
+ 21
204
+ 2
205
+ 0
206
+ 56
207
+ 0
208
+ 50
209
+ 1
210
+ 0
211
+ 11
212
+ I
213
+ 3
214
+ I
215
+ 0
216
+ I
217
+ 0
218
+ I
219
+ 0
220
+ I
221
+ -2
222
+ p
223
+ 2
224
+ M
225
+ 1
226
+ p
227
+ 2
228
+ x
229
+ 9
230
+ for_block
231
+ t
232
+ n
233
+ x
234
+ 9
235
+ __block__
236
+ i
237
+ 10
238
+ 7
239
+ 0
240
+ 64
241
+ 7
242
+ 1
243
+ 64
244
+ 49
245
+ 2
246
+ 1
247
+ 11
248
+ I
249
+ 3
250
+ I
251
+ 0
252
+ I
253
+ 0
254
+ I
255
+ 0
256
+ I
257
+ -2
258
+ p
259
+ 3
260
+ s
261
+ 11
262
+ hello world
263
+ s
264
+ 1
265
+ h
266
+ x
267
+ 5
268
+ score
269
+ p
270
+ 3
271
+ I
272
+ 0
273
+ I
274
+ 8
275
+ I
276
+ a
277
+ x
278
+ 39
279
+ /Users/matt/src/score/test/benchmark.rb
280
+ p
281
+ 0
282
+ x
283
+ 5
284
+ times
285
+ p
286
+ 3
287
+ I
288
+ 0
289
+ I
290
+ 8
291
+ I
292
+ 9
293
+ x
294
+ 39
295
+ /Users/matt/src/score/test/benchmark.rb
296
+ p
297
+ 0
298
+ x
299
+ 6
300
+ report
301
+ s
302
+ 11
303
+ two letters
304
+ M
305
+ 1
306
+ p
307
+ 2
308
+ x
309
+ 9
310
+ for_block
311
+ t
312
+ n
313
+ x
314
+ 9
315
+ __block__
316
+ i
317
+ 9
318
+ 21
319
+ 2
320
+ 0
321
+ 56
322
+ 0
323
+ 50
324
+ 1
325
+ 0
326
+ 11
327
+ I
328
+ 3
329
+ I
330
+ 0
331
+ I
332
+ 0
333
+ I
334
+ 0
335
+ I
336
+ -2
337
+ p
338
+ 2
339
+ M
340
+ 1
341
+ p
342
+ 2
343
+ x
344
+ 9
345
+ for_block
346
+ t
347
+ n
348
+ x
349
+ 9
350
+ __block__
351
+ i
352
+ 10
353
+ 7
354
+ 0
355
+ 64
356
+ 7
357
+ 1
358
+ 64
359
+ 49
360
+ 2
361
+ 1
362
+ 11
363
+ I
364
+ 3
365
+ I
366
+ 0
367
+ I
368
+ 0
369
+ I
370
+ 0
371
+ I
372
+ -2
373
+ p
374
+ 3
375
+ s
376
+ 11
377
+ hello world
378
+ s
379
+ 2
380
+ hw
381
+ x
382
+ 5
383
+ score
384
+ p
385
+ 3
386
+ I
387
+ 0
388
+ I
389
+ b
390
+ I
391
+ a
392
+ x
393
+ 39
394
+ /Users/matt/src/score/test/benchmark.rb
395
+ p
396
+ 0
397
+ x
398
+ 5
399
+ times
400
+ p
401
+ 3
402
+ I
403
+ 0
404
+ I
405
+ b
406
+ I
407
+ 9
408
+ x
409
+ 39
410
+ /Users/matt/src/score/test/benchmark.rb
411
+ p
412
+ 0
413
+ s
414
+ 10
415
+ full match
416
+ M
417
+ 1
418
+ p
419
+ 2
420
+ x
421
+ 9
422
+ for_block
423
+ t
424
+ n
425
+ x
426
+ 9
427
+ __block__
428
+ i
429
+ 9
430
+ 21
431
+ 2
432
+ 0
433
+ 56
434
+ 0
435
+ 50
436
+ 1
437
+ 0
438
+ 11
439
+ I
440
+ 3
441
+ I
442
+ 0
443
+ I
444
+ 0
445
+ I
446
+ 0
447
+ I
448
+ -2
449
+ p
450
+ 2
451
+ M
452
+ 1
453
+ p
454
+ 2
455
+ x
456
+ 9
457
+ for_block
458
+ t
459
+ n
460
+ x
461
+ 9
462
+ __block__
463
+ i
464
+ 10
465
+ 7
466
+ 0
467
+ 64
468
+ 7
469
+ 0
470
+ 64
471
+ 49
472
+ 1
473
+ 1
474
+ 11
475
+ I
476
+ 3
477
+ I
478
+ 0
479
+ I
480
+ 0
481
+ I
482
+ 0
483
+ I
484
+ -2
485
+ p
486
+ 2
487
+ s
488
+ 11
489
+ hello world
490
+ x
491
+ 5
492
+ score
493
+ p
494
+ 3
495
+ I
496
+ 0
497
+ I
498
+ e
499
+ I
500
+ a
501
+ x
502
+ 39
503
+ /Users/matt/src/score/test/benchmark.rb
504
+ p
505
+ 0
506
+ x
507
+ 5
508
+ times
509
+ p
510
+ 3
511
+ I
512
+ 0
513
+ I
514
+ e
515
+ I
516
+ 9
517
+ x
518
+ 39
519
+ /Users/matt/src/score/test/benchmark.rb
520
+ p
521
+ 0
522
+ s
523
+ 17
524
+ 13 char / 32 char
525
+ M
526
+ 1
527
+ p
528
+ 2
529
+ x
530
+ 9
531
+ for_block
532
+ t
533
+ n
534
+ x
535
+ 9
536
+ __block__
537
+ i
538
+ 9
539
+ 21
540
+ 2
541
+ 0
542
+ 56
543
+ 0
544
+ 50
545
+ 1
546
+ 0
547
+ 11
548
+ I
549
+ 3
550
+ I
551
+ 0
552
+ I
553
+ 0
554
+ I
555
+ 0
556
+ I
557
+ -2
558
+ p
559
+ 2
560
+ M
561
+ 1
562
+ p
563
+ 2
564
+ x
565
+ 9
566
+ for_block
567
+ t
568
+ n
569
+ x
570
+ 9
571
+ __block__
572
+ i
573
+ 10
574
+ 7
575
+ 0
576
+ 64
577
+ 7
578
+ 1
579
+ 64
580
+ 49
581
+ 2
582
+ 1
583
+ 11
584
+ I
585
+ 3
586
+ I
587
+ 0
588
+ I
589
+ 0
590
+ I
591
+ 0
592
+ I
593
+ -2
594
+ p
595
+ 3
596
+ s
597
+ 32
598
+ hello any world that will listen
599
+ s
600
+ 13
601
+ hlo wrdthlstn
602
+ x
603
+ 5
604
+ score
605
+ p
606
+ 3
607
+ I
608
+ 0
609
+ I
610
+ 12
611
+ I
612
+ a
613
+ x
614
+ 39
615
+ /Users/matt/src/score/test/benchmark.rb
616
+ p
617
+ 0
618
+ x
619
+ 5
620
+ times
621
+ p
622
+ 3
623
+ I
624
+ 0
625
+ I
626
+ 11
627
+ I
628
+ 9
629
+ x
630
+ 39
631
+ /Users/matt/src/score/test/benchmark.rb
632
+ p
633
+ 0
634
+ s
635
+ 18
636
+ 70 char / 446 char
637
+ M
638
+ 1
639
+ p
640
+ 2
641
+ x
642
+ 9
643
+ for_block
644
+ t
645
+ n
646
+ x
647
+ 9
648
+ __block__
649
+ i
650
+ 9
651
+ 21
652
+ 2
653
+ 0
654
+ 56
655
+ 0
656
+ 50
657
+ 1
658
+ 0
659
+ 11
660
+ I
661
+ 3
662
+ I
663
+ 0
664
+ I
665
+ 0
666
+ I
667
+ 0
668
+ I
669
+ -2
670
+ p
671
+ 2
672
+ M
673
+ 1
674
+ p
675
+ 2
676
+ x
677
+ 9
678
+ for_block
679
+ t
680
+ n
681
+ x
682
+ 9
683
+ __block__
684
+ i
685
+ 10
686
+ 7
687
+ 0
688
+ 64
689
+ 7
690
+ 1
691
+ 64
692
+ 49
693
+ 2
694
+ 1
695
+ 11
696
+ I
697
+ 3
698
+ I
699
+ 0
700
+ I
701
+ 0
702
+ I
703
+ 0
704
+ I
705
+ -2
706
+ p
707
+ 3
708
+ s
709
+ 446
710
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
711
+ s
712
+ 70
713
+ Lorem i dor coecadipg et, Duis aute irure dole nulla. qui ofa mot am l
714
+ x
715
+ 5
716
+ score
717
+ p
718
+ 3
719
+ I
720
+ 0
721
+ I
722
+ 17
723
+ I
724
+ a
725
+ x
726
+ 39
727
+ /Users/matt/src/score/test/benchmark.rb
728
+ p
729
+ 0
730
+ x
731
+ 5
732
+ times
733
+ p
734
+ 3
735
+ I
736
+ 0
737
+ I
738
+ 16
739
+ I
740
+ 9
741
+ x
742
+ 39
743
+ /Users/matt/src/score/test/benchmark.rb
744
+ p
745
+ 0
746
+ p
747
+ 13
748
+ I
749
+ 0
750
+ I
751
+ 6
752
+ I
753
+ 4
754
+ I
755
+ 7
756
+ I
757
+ f
758
+ I
759
+ a
760
+ I
761
+ 1a
762
+ I
763
+ d
764
+ I
765
+ 25
766
+ I
767
+ 10
768
+ I
769
+ 30
770
+ I
771
+ 15
772
+ I
773
+ 3b
774
+ x
775
+ 39
776
+ /Users/matt/src/score/test/benchmark.rb
777
+ p
778
+ 1
779
+ x
780
+ 1
781
+ x
782
+ x
783
+ 4
784
+ bmbm
785
+ p
786
+ 11
787
+ I
788
+ 0
789
+ I
790
+ 1
791
+ I
792
+ 9
793
+ I
794
+ 2
795
+ I
796
+ 12
797
+ I
798
+ 4
799
+ I
800
+ 17
801
+ I
802
+ 5
803
+ I
804
+ 28
805
+ I
806
+ 6
807
+ I
808
+ 33
809
+ x
810
+ 39
811
+ /Users/matt/src/score/test/benchmark.rb
812
+ p
813
+ 1
814
+ x
815
+ 10
816
+ iterations