mini_search 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "mini_search"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/formula1.svg ADDED
@@ -0,0 +1,144 @@
1
+ <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="62.123ex" height="8.343ex" style="vertical-align: -4.505ex;" viewBox="0 -1652.5 26747.2 3591.9" role="img" focusable="false" xmlns="http://www.w3.org/2000/svg" aria-labelledby="MathJax-SVG-1-Title">
2
+ <title id="MathJax-SVG-1-Title">{\displaystyle {\text{score}}(D,Q)=\sum _{i=1}^{n}{\text{IDF}}(q_{i})\cdot {\frac {f(q_{i},D)\cdot (k_{1}+1)}{f(q_{i},D)+k_{1}\cdot \left(1-b+b\cdot {\frac {|D|}{\text{avgdl}}}\right)}},}</title>
3
+ <defs aria-hidden="true">
4
+ <path stroke-width="1" id="E1-MJMAIN-73" d="M295 316Q295 356 268 385T190 414Q154 414 128 401Q98 382 98 349Q97 344 98 336T114 312T157 287Q175 282 201 278T245 269T277 256Q294 248 310 236T342 195T359 133Q359 71 321 31T198 -10H190Q138 -10 94 26L86 19L77 10Q71 4 65 -1L54 -11H46H42Q39 -11 33 -5V74V132Q33 153 35 157T45 162H54Q66 162 70 158T75 146T82 119T101 77Q136 26 198 26Q295 26 295 104Q295 133 277 151Q257 175 194 187T111 210Q75 227 54 256T33 318Q33 357 50 384T93 424T143 442T187 447H198Q238 447 268 432L283 424L292 431Q302 440 314 448H322H326Q329 448 335 442V310L329 304H301Q295 310 295 316Z"></path>
5
+ <path stroke-width="1" id="E1-MJMAIN-63" d="M370 305T349 305T313 320T297 358Q297 381 312 396Q317 401 317 402T307 404Q281 408 258 408Q209 408 178 376Q131 329 131 219Q131 137 162 90Q203 29 272 29Q313 29 338 55T374 117Q376 125 379 127T395 129H409Q415 123 415 120Q415 116 411 104T395 71T366 33T318 2T249 -11Q163 -11 99 53T34 214Q34 318 99 383T250 448T370 421T404 357Q404 334 387 320Z"></path>
6
+ <path stroke-width="1" id="E1-MJMAIN-6F" d="M28 214Q28 309 93 378T250 448Q340 448 405 380T471 215Q471 120 407 55T250 -10Q153 -10 91 57T28 214ZM250 30Q372 30 372 193V225V250Q372 272 371 288T364 326T348 362T317 390T268 410Q263 411 252 411Q222 411 195 399Q152 377 139 338T126 246V226Q126 130 145 91Q177 30 250 30Z"></path>
7
+ <path stroke-width="1" id="E1-MJMAIN-72" d="M36 46H50Q89 46 97 60V68Q97 77 97 91T98 122T98 161T98 203Q98 234 98 269T98 328L97 351Q94 370 83 376T38 385H20V408Q20 431 22 431L32 432Q42 433 60 434T96 436Q112 437 131 438T160 441T171 442H174V373Q213 441 271 441H277Q322 441 343 419T364 373Q364 352 351 337T313 322Q288 322 276 338T263 372Q263 381 265 388T270 400T273 405Q271 407 250 401Q234 393 226 386Q179 341 179 207V154Q179 141 179 127T179 101T180 81T180 66V61Q181 59 183 57T188 54T193 51T200 49T207 48T216 47T225 47T235 46T245 46H276V0H267Q249 3 140 3Q37 3 28 0H20V46H36Z"></path>
8
+ <path stroke-width="1" id="E1-MJMAIN-65" d="M28 218Q28 273 48 318T98 391T163 433T229 448Q282 448 320 430T378 380T406 316T415 245Q415 238 408 231H126V216Q126 68 226 36Q246 30 270 30Q312 30 342 62Q359 79 369 104L379 128Q382 131 395 131H398Q415 131 415 121Q415 117 412 108Q393 53 349 21T250 -11Q155 -11 92 58T28 218ZM333 275Q322 403 238 411H236Q228 411 220 410T195 402T166 381T143 340T127 274V267H333V275Z"></path>
9
+ <path stroke-width="1" id="E1-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path>
10
+ <path stroke-width="1" id="E1-MJMATHI-44" d="M287 628Q287 635 230 637Q207 637 200 638T193 647Q193 655 197 667T204 682Q206 683 403 683Q570 682 590 682T630 676Q702 659 752 597T803 431Q803 275 696 151T444 3L430 1L236 0H125H72Q48 0 41 2T33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM703 469Q703 507 692 537T666 584T629 613T590 629T555 636Q553 636 541 636T512 636T479 637H436Q392 637 386 627Q384 623 313 339T242 52Q242 48 253 48T330 47Q335 47 349 47T373 46Q499 46 581 128Q617 164 640 212T683 339T703 469Z"></path>
11
+ <path stroke-width="1" id="E1-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path>
12
+ <path stroke-width="1" id="E1-MJMATHI-51" d="M399 -80Q399 -47 400 -30T402 -11V-7L387 -11Q341 -22 303 -22Q208 -22 138 35T51 201Q50 209 50 244Q50 346 98 438T227 601Q351 704 476 704Q514 704 524 703Q621 689 680 617T740 435Q740 255 592 107Q529 47 461 16L444 8V3Q444 2 449 -24T470 -66T516 -82Q551 -82 583 -60T625 -3Q631 11 638 11Q647 11 649 2Q649 -6 639 -34T611 -100T557 -165T481 -194Q399 -194 399 -87V-80ZM636 468Q636 523 621 564T580 625T530 655T477 665Q429 665 379 640Q277 591 215 464T153 216Q153 110 207 59Q231 38 236 38V46Q236 86 269 120T347 155Q372 155 390 144T417 114T429 82T435 55L448 64Q512 108 557 185T619 334T636 468ZM314 18Q362 18 404 39L403 49Q399 104 366 115Q354 117 347 117Q344 117 341 117T337 118Q317 118 296 98T274 52Q274 18 314 18Z"></path>
13
+ <path stroke-width="1" id="E1-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path>
14
+ <path stroke-width="1" id="E1-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path>
15
+ <path stroke-width="1" id="E1-MJSZ2-2211" d="M60 948Q63 950 665 950H1267L1325 815Q1384 677 1388 669H1348L1341 683Q1320 724 1285 761Q1235 809 1174 838T1033 881T882 898T699 902H574H543H251L259 891Q722 258 724 252Q725 250 724 246Q721 243 460 -56L196 -356Q196 -357 407 -357Q459 -357 548 -357T676 -358Q812 -358 896 -353T1063 -332T1204 -283T1307 -196Q1328 -170 1348 -124H1388Q1388 -125 1381 -145T1356 -210T1325 -294L1267 -449L666 -450Q64 -450 61 -448Q55 -446 55 -439Q55 -437 57 -433L590 177Q590 178 557 222T452 366T322 544L56 909L55 924Q55 945 60 948Z"></path>
16
+ <path stroke-width="1" id="E1-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path>
17
+ <path stroke-width="1" id="E1-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path>
18
+ <path stroke-width="1" id="E1-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path>
19
+ <path stroke-width="1" id="E1-MJMAIN-49" d="M328 0Q307 3 180 3T32 0H21V46H43Q92 46 106 49T126 60Q128 63 128 342Q128 620 126 623Q122 628 118 630T96 635T43 637H21V683H32Q53 680 180 680T328 683H339V637H317Q268 637 254 634T234 623Q232 620 232 342Q232 63 234 60Q238 55 242 53T264 48T317 46H339V0H328Z"></path>
20
+ <path stroke-width="1" id="E1-MJMAIN-44" d="M130 622Q123 629 119 631T103 634T60 637H27V683H228Q399 682 419 682T461 676Q504 667 546 641T626 573T685 470T708 336Q708 210 634 116T442 3Q429 1 228 0H27V46H60Q102 47 111 49T130 61V622ZM593 338Q593 439 571 501T493 602Q439 637 355 637H322H294Q238 637 234 628Q231 624 231 344Q231 62 232 59Q233 49 248 48T339 46H350Q456 46 515 95Q561 133 577 191T593 338Z"></path>
21
+ <path stroke-width="1" id="E1-MJMAIN-46" d="M128 619Q121 626 117 628T101 631T58 634H25V680H582V676Q584 670 596 560T610 444V440H570V444Q563 493 561 501Q555 538 543 563T516 601T477 622T431 631T374 633H334H286Q252 633 244 631T233 621Q232 619 232 490V363H284Q287 363 303 363T327 364T349 367T372 373T389 385Q407 403 410 459V480H450V200H410V221Q407 276 389 296Q381 303 371 307T348 313T327 316T303 317T284 317H232V189L233 61Q240 54 245 52T270 48T333 46H360V0H348Q324 3 182 3Q51 3 36 0H25V46H58Q100 47 109 49T128 61V619Z"></path>
22
+ <path stroke-width="1" id="E1-MJMATHI-71" d="M33 157Q33 258 109 349T280 441Q340 441 372 389Q373 390 377 395T388 406T404 418Q438 442 450 442Q454 442 457 439T460 434Q460 425 391 149Q320 -135 320 -139Q320 -147 365 -148H390Q396 -156 396 -157T393 -175Q389 -188 383 -194H370Q339 -192 262 -192Q234 -192 211 -192T174 -192T157 -193Q143 -193 143 -185Q143 -182 145 -170Q149 -154 152 -151T172 -148Q220 -148 230 -141Q238 -136 258 -53T279 32Q279 33 272 29Q224 -10 172 -10Q117 -10 75 30T33 157ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path>
23
+ <path stroke-width="1" id="E1-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path>
24
+ <path stroke-width="1" id="E1-MJMATHI-66" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path>
25
+ <path stroke-width="1" id="E1-MJMATHI-6B" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path>
26
+ <path stroke-width="1" id="E1-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path>
27
+ <path stroke-width="1" id="E1-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path>
28
+ <path stroke-width="1" id="E1-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path>
29
+ <path stroke-width="1" id="E1-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path>
30
+ <path stroke-width="1" id="E1-MJMAIN-61" d="M137 305T115 305T78 320T63 359Q63 394 97 421T218 448Q291 448 336 416T396 340Q401 326 401 309T402 194V124Q402 76 407 58T428 40Q443 40 448 56T453 109V145H493V106Q492 66 490 59Q481 29 455 12T400 -6T353 12T329 54V58L327 55Q325 52 322 49T314 40T302 29T287 17T269 6T247 -2T221 -8T190 -11Q130 -11 82 20T34 107Q34 128 41 147T68 188T116 225T194 253T304 268H318V290Q318 324 312 340Q290 411 215 411Q197 411 181 410T156 406T148 403Q170 388 170 359Q170 334 154 320ZM126 106Q126 75 150 51T209 26Q247 26 276 49T315 109Q317 116 318 175Q318 233 317 233Q309 233 296 232T251 223T193 203T147 166T126 106Z"></path>
31
+ <path stroke-width="1" id="E1-MJMAIN-76" d="M338 431Q344 429 422 429Q479 429 503 431H508V385H497Q439 381 423 345Q421 341 356 172T288 -2Q283 -11 263 -11Q244 -11 239 -2Q99 359 98 364Q93 378 82 381T43 385H19V431H25L33 430Q41 430 53 430T79 430T104 429T122 428Q217 428 232 431H240V385H226Q187 384 184 370Q184 366 235 234L286 102L377 341V349Q377 363 367 372T349 383T335 385H331V431H338Z"></path>
32
+ <path stroke-width="1" id="E1-MJMAIN-67" d="M329 409Q373 453 429 453Q459 453 472 434T485 396Q485 382 476 371T449 360Q416 360 412 390Q410 404 415 411Q415 412 416 414V415Q388 412 363 393Q355 388 355 386Q355 385 359 381T368 369T379 351T388 325T392 292Q392 230 343 187T222 143Q172 143 123 171Q112 153 112 133Q112 98 138 81Q147 75 155 75T227 73Q311 72 335 67Q396 58 431 26Q470 -13 470 -72Q470 -139 392 -175Q332 -206 250 -206Q167 -206 107 -175Q29 -140 29 -75Q29 -39 50 -15T92 18L103 24Q67 55 67 108Q67 155 96 193Q52 237 52 292Q52 355 102 398T223 442Q274 442 318 416L329 409ZM299 343Q294 371 273 387T221 404Q192 404 171 388T145 343Q142 326 142 292Q142 248 149 227T179 192Q196 182 222 182Q244 182 260 189T283 207T294 227T299 242Q302 258 302 292T299 343ZM403 -75Q403 -50 389 -34T348 -11T299 -2T245 0H218Q151 0 138 -6Q118 -15 107 -34T95 -74Q95 -84 101 -97T122 -127T170 -155T250 -167Q319 -167 361 -139T403 -75Z"></path>
33
+ <path stroke-width="1" id="E1-MJMAIN-64" d="M376 495Q376 511 376 535T377 568Q377 613 367 624T316 637H298V660Q298 683 300 683L310 684Q320 685 339 686T376 688Q393 689 413 690T443 693T454 694H457V390Q457 84 458 81Q461 61 472 55T517 46H535V0Q533 0 459 -5T380 -11H373V44L365 37Q307 -11 235 -11Q158 -11 96 50T34 215Q34 315 97 378T244 442Q319 442 376 393V495ZM373 342Q328 405 260 405Q211 405 173 369Q146 341 139 305T131 211Q131 155 138 120T173 59Q203 26 251 26Q322 26 373 103V342Z"></path>
34
+ <path stroke-width="1" id="E1-MJMAIN-6C" d="M42 46H56Q95 46 103 60V68Q103 77 103 91T103 124T104 167T104 217T104 272T104 329Q104 366 104 407T104 482T104 542T103 586T103 603Q100 622 89 628T44 637H26V660Q26 683 28 683L38 684Q48 685 67 686T104 688Q121 689 141 690T171 693T182 694H185V379Q185 62 186 60Q190 52 198 49Q219 46 247 46H263V0H255L232 1Q209 2 183 2T145 3T107 3T57 1L34 0H26V46H42Z"></path>
35
+ <path stroke-width="1" id="E1-MJSZ2-28" d="M180 96T180 250T205 541T266 770T353 944T444 1069T527 1150H555Q561 1144 561 1141Q561 1137 545 1120T504 1072T447 995T386 878T330 721T288 513T272 251Q272 133 280 56Q293 -87 326 -209T399 -405T475 -531T536 -609T561 -640Q561 -643 555 -649H527Q483 -612 443 -568T353 -443T266 -270T205 -41Z"></path>
36
+ <path stroke-width="1" id="E1-MJSZ2-29" d="M35 1138Q35 1150 51 1150H56H69Q113 1113 153 1069T243 944T330 771T391 541T416 250T391 -40T330 -270T243 -443T152 -568T69 -649H56Q43 -649 39 -647T35 -637Q65 -607 110 -548Q283 -316 316 56Q324 133 324 251Q324 368 316 445Q278 877 48 1123Q36 1137 35 1138Z"></path>
37
+ </defs>
38
+ <g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)" aria-hidden="true">
39
+ <use xlink:href="#E1-MJMAIN-73"></use>
40
+ <use xlink:href="#E1-MJMAIN-63" x="394" y="0"></use>
41
+ <use xlink:href="#E1-MJMAIN-6F" x="839" y="0"></use>
42
+ <use xlink:href="#E1-MJMAIN-72" x="1339" y="0"></use>
43
+ <use xlink:href="#E1-MJMAIN-65" x="1732" y="0"></use>
44
+ <use xlink:href="#E1-MJMAIN-28" x="2176" y="0"></use>
45
+ <use xlink:href="#E1-MJMATHI-44" x="2566" y="0"></use>
46
+ <use xlink:href="#E1-MJMAIN-2C" x="3394" y="0"></use>
47
+ <use xlink:href="#E1-MJMATHI-51" x="3839" y="0"></use>
48
+ <use xlink:href="#E1-MJMAIN-29" x="4631" y="0"></use>
49
+ <use xlink:href="#E1-MJMAIN-3D" x="5298" y="0"></use>
50
+ <g transform="translate(6354,0)">
51
+ <use xlink:href="#E1-MJSZ2-2211" x="0" y="0"></use>
52
+ <g transform="translate(147,-1090)">
53
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="0" y="0"></use>
54
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-3D" x="345" y="0"></use>
55
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-31" x="1124" y="0"></use>
56
+ </g>
57
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-6E" x="721" y="1627"></use>
58
+ </g>
59
+ <g transform="translate(7965,0)">
60
+ <use xlink:href="#E1-MJMAIN-49"></use>
61
+ <use xlink:href="#E1-MJMAIN-44" x="361" y="0"></use>
62
+ <use xlink:href="#E1-MJMAIN-46" x="1126" y="0"></use>
63
+ </g>
64
+ <use xlink:href="#E1-MJMAIN-28" x="9745" y="0"></use>
65
+ <g transform="translate(10134,0)">
66
+ <use xlink:href="#E1-MJMATHI-71" x="0" y="0"></use>
67
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="631" y="-213"></use>
68
+ </g>
69
+ <use xlink:href="#E1-MJMAIN-29" x="10925" y="0"></use>
70
+ <use xlink:href="#E1-MJMAIN-22C5" x="11537" y="0"></use>
71
+ <g transform="translate(12038,0)">
72
+ <g transform="translate(120,0)">
73
+ <rect stroke="none" width="14190" height="60" x="0" y="220"></rect>
74
+ <g transform="translate(3297,770)">
75
+ <use xlink:href="#E1-MJMATHI-66" x="0" y="0"></use>
76
+ <use xlink:href="#E1-MJMAIN-28" x="550" y="0"></use>
77
+ <g transform="translate(940,0)">
78
+ <use xlink:href="#E1-MJMATHI-71" x="0" y="0"></use>
79
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="631" y="-213"></use>
80
+ </g>
81
+ <use xlink:href="#E1-MJMAIN-2C" x="1730" y="0"></use>
82
+ <use xlink:href="#E1-MJMATHI-44" x="2175" y="0"></use>
83
+ <use xlink:href="#E1-MJMAIN-29" x="3004" y="0"></use>
84
+ <use xlink:href="#E1-MJMAIN-22C5" x="3616" y="0"></use>
85
+ <use xlink:href="#E1-MJMAIN-28" x="4116" y="0"></use>
86
+ <g transform="translate(4506,0)">
87
+ <use xlink:href="#E1-MJMATHI-6B" x="0" y="0"></use>
88
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-31" x="737" y="-213"></use>
89
+ </g>
90
+ <use xlink:href="#E1-MJMAIN-2B" x="5704" y="0"></use>
91
+ <use xlink:href="#E1-MJMAIN-31" x="6704" y="0"></use>
92
+ <use xlink:href="#E1-MJMAIN-29" x="7205" y="0"></use>
93
+ </g>
94
+ <g transform="translate(60,-1172)">
95
+ <use xlink:href="#E1-MJMATHI-66" x="0" y="0"></use>
96
+ <use xlink:href="#E1-MJMAIN-28" x="550" y="0"></use>
97
+ <g transform="translate(940,0)">
98
+ <use xlink:href="#E1-MJMATHI-71" x="0" y="0"></use>
99
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="631" y="-213"></use>
100
+ </g>
101
+ <use xlink:href="#E1-MJMAIN-2C" x="1730" y="0"></use>
102
+ <use xlink:href="#E1-MJMATHI-44" x="2175" y="0"></use>
103
+ <use xlink:href="#E1-MJMAIN-29" x="3004" y="0"></use>
104
+ <use xlink:href="#E1-MJMAIN-2B" x="3616" y="0"></use>
105
+ <g transform="translate(4616,0)">
106
+ <use xlink:href="#E1-MJMATHI-6B" x="0" y="0"></use>
107
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-31" x="737" y="-213"></use>
108
+ </g>
109
+ <use xlink:href="#E1-MJMAIN-22C5" x="5814" y="0"></use>
110
+ <g transform="translate(6315,0)">
111
+ <use xlink:href="#E1-MJSZ2-28"></use>
112
+ <g transform="translate(597,0)">
113
+ <use xlink:href="#E1-MJMAIN-31" x="0" y="0"></use>
114
+ <use xlink:href="#E1-MJMAIN-2212" x="722" y="0"></use>
115
+ <use xlink:href="#E1-MJMATHI-62" x="1723" y="0"></use>
116
+ <use xlink:href="#E1-MJMAIN-2B" x="2375" y="0"></use>
117
+ <use xlink:href="#E1-MJMATHI-62" x="3375" y="0"></use>
118
+ <use xlink:href="#E1-MJMAIN-22C5" x="4027" y="0"></use>
119
+ <g transform="translate(4528,0)">
120
+ <g transform="translate(120,0)">
121
+ <rect stroke="none" width="1791" height="60" x="0" y="220"></rect>
122
+ <g transform="translate(406,621)">
123
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-7C" x="0" y="0"></use>
124
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-44" x="278" y="0"></use>
125
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-7C" x="1107" y="0"></use>
126
+ </g>
127
+ <g transform="translate(60,-437)">
128
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-61"></use>
129
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-76" x="500" y="0"></use>
130
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-67" x="1029" y="0"></use>
131
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-64" x="1529" y="0"></use>
132
+ <use transform="scale(0.707)" xlink:href="#E1-MJMAIN-6C" x="2086" y="0"></use>
133
+ </g>
134
+ </g>
135
+ </g>
136
+ </g>
137
+ <use xlink:href="#E1-MJSZ2-29" x="7157" y="-1"></use>
138
+ </g>
139
+ </g>
140
+ </g>
141
+ </g>
142
+ <use xlink:href="#E1-MJMAIN-2C" x="26468" y="0"></use>
143
+ </g>
144
+ </svg>
data/formula2.svg ADDED
@@ -0,0 +1,79 @@
1
+ <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="31.476ex" height="6.509ex" style="vertical-align: -2.671ex;" viewBox="0 -1652.5 13552.2 2802.6" role="img" focusable="false" xmlns="http://www.w3.org/2000/svg" aria-labelledby="MathJax-SVG-1-Title">
2
+ <title id="MathJax-SVG-1-Title">{\displaystyle {\text{IDF}}(q_{i})=\log {\frac {N-n(q_{i})+0.5}{n(q_{i})+0.5}},}</title>
3
+ <defs aria-hidden="true">
4
+ <path stroke-width="1" id="E1-MJMAIN-49" d="M328 0Q307 3 180 3T32 0H21V46H43Q92 46 106 49T126 60Q128 63 128 342Q128 620 126 623Q122 628 118 630T96 635T43 637H21V683H32Q53 680 180 680T328 683H339V637H317Q268 637 254 634T234 623Q232 620 232 342Q232 63 234 60Q238 55 242 53T264 48T317 46H339V0H328Z"></path>
5
+ <path stroke-width="1" id="E1-MJMAIN-44" d="M130 622Q123 629 119 631T103 634T60 637H27V683H228Q399 682 419 682T461 676Q504 667 546 641T626 573T685 470T708 336Q708 210 634 116T442 3Q429 1 228 0H27V46H60Q102 47 111 49T130 61V622ZM593 338Q593 439 571 501T493 602Q439 637 355 637H322H294Q238 637 234 628Q231 624 231 344Q231 62 232 59Q233 49 248 48T339 46H350Q456 46 515 95Q561 133 577 191T593 338Z"></path>
6
+ <path stroke-width="1" id="E1-MJMAIN-46" d="M128 619Q121 626 117 628T101 631T58 634H25V680H582V676Q584 670 596 560T610 444V440H570V444Q563 493 561 501Q555 538 543 563T516 601T477 622T431 631T374 633H334H286Q252 633 244 631T233 621Q232 619 232 490V363H284Q287 363 303 363T327 364T349 367T372 373T389 385Q407 403 410 459V480H450V200H410V221Q407 276 389 296Q381 303 371 307T348 313T327 316T303 317T284 317H232V189L233 61Q240 54 245 52T270 48T333 46H360V0H348Q324 3 182 3Q51 3 36 0H25V46H58Q100 47 109 49T128 61V619Z"></path>
7
+ <path stroke-width="1" id="E1-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path>
8
+ <path stroke-width="1" id="E1-MJMATHI-71" d="M33 157Q33 258 109 349T280 441Q340 441 372 389Q373 390 377 395T388 406T404 418Q438 442 450 442Q454 442 457 439T460 434Q460 425 391 149Q320 -135 320 -139Q320 -147 365 -148H390Q396 -156 396 -157T393 -175Q389 -188 383 -194H370Q339 -192 262 -192Q234 -192 211 -192T174 -192T157 -193Q143 -193 143 -185Q143 -182 145 -170Q149 -154 152 -151T172 -148Q220 -148 230 -141Q238 -136 258 -53T279 32Q279 33 272 29Q224 -10 172 -10Q117 -10 75 30T33 157ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path>
9
+ <path stroke-width="1" id="E1-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path>
10
+ <path stroke-width="1" id="E1-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path>
11
+ <path stroke-width="1" id="E1-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path>
12
+ <path stroke-width="1" id="E1-MJMAIN-6C" d="M42 46H56Q95 46 103 60V68Q103 77 103 91T103 124T104 167T104 217T104 272T104 329Q104 366 104 407T104 482T104 542T103 586T103 603Q100 622 89 628T44 637H26V660Q26 683 28 683L38 684Q48 685 67 686T104 688Q121 689 141 690T171 693T182 694H185V379Q185 62 186 60Q190 52 198 49Q219 46 247 46H263V0H255L232 1Q209 2 183 2T145 3T107 3T57 1L34 0H26V46H42Z"></path>
13
+ <path stroke-width="1" id="E1-MJMAIN-6F" d="M28 214Q28 309 93 378T250 448Q340 448 405 380T471 215Q471 120 407 55T250 -10Q153 -10 91 57T28 214ZM250 30Q372 30 372 193V225V250Q372 272 371 288T364 326T348 362T317 390T268 410Q263 411 252 411Q222 411 195 399Q152 377 139 338T126 246V226Q126 130 145 91Q177 30 250 30Z"></path>
14
+ <path stroke-width="1" id="E1-MJMAIN-67" d="M329 409Q373 453 429 453Q459 453 472 434T485 396Q485 382 476 371T449 360Q416 360 412 390Q410 404 415 411Q415 412 416 414V415Q388 412 363 393Q355 388 355 386Q355 385 359 381T368 369T379 351T388 325T392 292Q392 230 343 187T222 143Q172 143 123 171Q112 153 112 133Q112 98 138 81Q147 75 155 75T227 73Q311 72 335 67Q396 58 431 26Q470 -13 470 -72Q470 -139 392 -175Q332 -206 250 -206Q167 -206 107 -175Q29 -140 29 -75Q29 -39 50 -15T92 18L103 24Q67 55 67 108Q67 155 96 193Q52 237 52 292Q52 355 102 398T223 442Q274 442 318 416L329 409ZM299 343Q294 371 273 387T221 404Q192 404 171 388T145 343Q142 326 142 292Q142 248 149 227T179 192Q196 182 222 182Q244 182 260 189T283 207T294 227T299 242Q302 258 302 292T299 343ZM403 -75Q403 -50 389 -34T348 -11T299 -2T245 0H218Q151 0 138 -6Q118 -15 107 -34T95 -74Q95 -84 101 -97T122 -127T170 -155T250 -167Q319 -167 361 -139T403 -75Z"></path>
15
+ <path stroke-width="1" id="E1-MJMATHI-4E" d="M234 637Q231 637 226 637Q201 637 196 638T191 649Q191 676 202 682Q204 683 299 683Q376 683 387 683T401 677Q612 181 616 168L670 381Q723 592 723 606Q723 633 659 637Q635 637 635 648Q635 650 637 660Q641 676 643 679T653 683Q656 683 684 682T767 680Q817 680 843 681T873 682Q888 682 888 672Q888 650 880 642Q878 637 858 637Q787 633 769 597L620 7Q618 0 599 0Q585 0 582 2Q579 5 453 305L326 604L261 344Q196 88 196 79Q201 46 268 46H278Q284 41 284 38T282 19Q278 6 272 0H259Q228 2 151 2Q123 2 100 2T63 2T46 1Q31 1 31 10Q31 14 34 26T39 40Q41 46 62 46Q130 49 150 85Q154 91 221 362L289 634Q287 635 234 637Z"></path>
16
+ <path stroke-width="1" id="E1-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path>
17
+ <path stroke-width="1" id="E1-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path>
18
+ <path stroke-width="1" id="E1-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path>
19
+ <path stroke-width="1" id="E1-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path>
20
+ <path stroke-width="1" id="E1-MJMAIN-2E" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path>
21
+ <path stroke-width="1" id="E1-MJMAIN-35" d="M164 157Q164 133 148 117T109 101H102Q148 22 224 22Q294 22 326 82Q345 115 345 210Q345 313 318 349Q292 382 260 382H254Q176 382 136 314Q132 307 129 306T114 304Q97 304 95 310Q93 314 93 485V614Q93 664 98 664Q100 666 102 666Q103 666 123 658T178 642T253 634Q324 634 389 662Q397 666 402 666Q410 666 410 648V635Q328 538 205 538Q174 538 149 544L139 546V374Q158 388 169 396T205 412T256 420Q337 420 393 355T449 201Q449 109 385 44T229 -22Q148 -22 99 32T50 154Q50 178 61 192T84 210T107 214Q132 214 148 197T164 157Z"></path>
22
+ <path stroke-width="1" id="E1-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path>
23
+ </defs>
24
+ <g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)" aria-hidden="true">
25
+ <use xlink:href="#E1-MJMAIN-49"></use>
26
+ <use xlink:href="#E1-MJMAIN-44" x="361" y="0"></use>
27
+ <use xlink:href="#E1-MJMAIN-46" x="1126" y="0"></use>
28
+ <use xlink:href="#E1-MJMAIN-28" x="1779" y="0"></use>
29
+ <g transform="translate(2169,0)">
30
+ <use xlink:href="#E1-MJMATHI-71" x="0" y="0"></use>
31
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="631" y="-213"></use>
32
+ </g>
33
+ <use xlink:href="#E1-MJMAIN-29" x="2959" y="0"></use>
34
+ <use xlink:href="#E1-MJMAIN-3D" x="3627" y="0"></use>
35
+ <g transform="translate(4683,0)">
36
+ <use xlink:href="#E1-MJMAIN-6C"></use>
37
+ <use xlink:href="#E1-MJMAIN-6F" x="278" y="0"></use>
38
+ <use xlink:href="#E1-MJMAIN-67" x="779" y="0"></use>
39
+ </g>
40
+ <g transform="translate(6129,0)">
41
+ <g transform="translate(120,0)">
42
+ <rect stroke="none" width="6904" height="60" x="0" y="220"></rect>
43
+ <g transform="translate(60,770)">
44
+ <use xlink:href="#E1-MJMATHI-4E" x="0" y="0"></use>
45
+ <use xlink:href="#E1-MJMAIN-2212" x="1110" y="0"></use>
46
+ <use xlink:href="#E1-MJMATHI-6E" x="2111" y="0"></use>
47
+ <use xlink:href="#E1-MJMAIN-28" x="2711" y="0"></use>
48
+ <g transform="translate(3101,0)">
49
+ <use xlink:href="#E1-MJMATHI-71" x="0" y="0"></use>
50
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="631" y="-213"></use>
51
+ </g>
52
+ <use xlink:href="#E1-MJMAIN-29" x="3892" y="0"></use>
53
+ <use xlink:href="#E1-MJMAIN-2B" x="4503" y="0"></use>
54
+ <g transform="translate(5504,0)">
55
+ <use xlink:href="#E1-MJMAIN-30"></use>
56
+ <use xlink:href="#E1-MJMAIN-2E" x="500" y="0"></use>
57
+ <use xlink:href="#E1-MJMAIN-35" x="779" y="0"></use>
58
+ </g>
59
+ </g>
60
+ <g transform="translate(1115,-771)">
61
+ <use xlink:href="#E1-MJMATHI-6E" x="0" y="0"></use>
62
+ <use xlink:href="#E1-MJMAIN-28" x="600" y="0"></use>
63
+ <g transform="translate(990,0)">
64
+ <use xlink:href="#E1-MJMATHI-71" x="0" y="0"></use>
65
+ <use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="631" y="-213"></use>
66
+ </g>
67
+ <use xlink:href="#E1-MJMAIN-29" x="1780" y="0"></use>
68
+ <use xlink:href="#E1-MJMAIN-2B" x="2392" y="0"></use>
69
+ <g transform="translate(3393,0)">
70
+ <use xlink:href="#E1-MJMAIN-30"></use>
71
+ <use xlink:href="#E1-MJMAIN-2E" x="500" y="0"></use>
72
+ <use xlink:href="#E1-MJMAIN-35" x="779" y="0"></use>
73
+ </g>
74
+ </g>
75
+ </g>
76
+ </g>
77
+ <use xlink:href="#E1-MJMAIN-2C" x="13273" y="0"></use>
78
+ </g>
79
+ </svg>
@@ -0,0 +1,75 @@
1
+ require 'mini_search/version.rb'
2
+ require 'mini_search/stemmer/portuguese.rb'
3
+ require 'mini_search/standard_whitespace_tokenizer.rb'
4
+ require 'mini_search/strip_filter.rb'
5
+ require 'mini_search/remove_punctuation_filter.rb'
6
+ require 'mini_search/downcase_filter.rb'
7
+ require 'mini_search/stop_words_filter.rb'
8
+ require 'mini_search/synonyms_filter.rb'
9
+ require 'mini_search/stemmer_filter.rb'
10
+ require 'mini_search/language_support/portuguese.rb'
11
+ require 'mini_search/pipeline.rb'
12
+ require 'mini_search/inverted_index.rb'
13
+ require 'mini_search/tf.rb'
14
+ require 'mini_search/idf.rb'
15
+ require 'mini_search/bm_25.rb'
16
+
17
+ module MiniSearch
18
+ LANGUAGE_SUPPORTS = {
19
+ pt: MiniSearch::LanguageSupport::Portuguese
20
+ }
21
+
22
+ def self.new(indexing_pipeline, querying_pipeline)
23
+ MiniSearch::InvertedIndex.new(indexing_pipeline, querying_pipeline)
24
+ end
25
+
26
+ def self.new_index(stop_words: [], synonyms_map: {}, stemmer: nil)
27
+ standard_whitespace_tokenizer = StandardWhitespaceTokenizer.new
28
+
29
+ strip_filter = StripFilter.new
30
+ remove_punctuation_filter = RemovePunctuationFilter.new
31
+ downcase_filter = DowncaseFilter.new
32
+ stop_words_filter = StopWordsFilter.new(stop_words)
33
+ stemmer_filter = StemmerFilter.new(stemmer)
34
+ synonyms_filter = SynonymsFilter.new(synonyms_map)
35
+
36
+ indexing_pipeline = Pipeline.new(
37
+ standard_whitespace_tokenizer,
38
+ [
39
+ strip_filter,
40
+ remove_punctuation_filter,
41
+ downcase_filter,
42
+ stop_words_filter,
43
+ stemmer_filter
44
+ ]
45
+ )
46
+
47
+ querying_pipeline = Pipeline.new(
48
+ standard_whitespace_tokenizer,
49
+ [
50
+ strip_filter,
51
+ remove_punctuation_filter,
52
+ downcase_filter,
53
+ stop_words_filter,
54
+ stemmer_filter,
55
+ synonyms_filter
56
+ ]
57
+ )
58
+
59
+ new(indexing_pipeline, querying_pipeline)
60
+ end
61
+
62
+ def self.new_localized_index(language_support, synonyms_map: {}, stop_words: [])
63
+ if language_support.is_a?(Symbol)
64
+ language_support = LANGUAGE_SUPPORTS[language_support].new(stop_words)
65
+ end
66
+
67
+ raise 'language support not found or nil' unless language_support
68
+
69
+ new_index(
70
+ stop_words: language_support.stop_words,
71
+ stemmer: language_support.stemmer,
72
+ synonyms_map: synonyms_map
73
+ )
74
+ end
75
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniSearch
4
+ # https://en.wikipedia.org/wiki/Okapi_BM25
5
+ class Bm25
6
+ def self.calculate(tf:, idf:, k1: 1.2, b: 0.75, document_length:, document_length_average:)
7
+ idf * (
8
+ (tf * (k1 + 1))
9
+ \
10
+ (tf + k1 * (1 - b + b * (document_length.to_f / document_length_average)))
11
+ )
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniSearch
4
+ class DowncaseFilter
5
+ def execute(tokens)
6
+ tokens.map(&:downcase)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniSearch
4
+ class Idf
5
+ def self.calculate(number_of_documents_with_term, number_of_documents)
6
+ Math.log(
7
+ (number_of_documents.to_f - number_of_documents_with_term.to_f + 0.5) /
8
+ (number_of_documents_with_term.to_f + 0.5)
9
+ )
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniSearch
4
+ # Very simple and naive in-memory search engine
5
+ # implements an inverted index
6
+ class InvertedIndex
7
+ attr_accessor :inverted_index, :documents, :document_length_average, :indexing_pipeline, :querying_pipeline
8
+
9
+ def initialize(indexing_pipeline, querying_pipeline)
10
+ @indexing_pipeline = indexing_pipeline
11
+ @querying_pipeline = querying_pipeline
12
+ @documents = {}
13
+ @inverted_index = {}
14
+ @document_length_average = 0.0
15
+ end
16
+
17
+ # Index a document, documents are simply Hashs with at least
18
+ #
19
+ # {
20
+ # id: 'unique_id',
21
+ # indexed_field: 'text field',
22
+ # ...
23
+ # }
24
+ #
25
+ def index(document)
26
+ remove(document.fetch(:id)) if @documents[document.fetch(:id)]
27
+
28
+ terms = @indexing_pipeline.execute(document.fetch(:indexed_field))
29
+
30
+ @documents[document.fetch(:id)] = {
31
+ document: document,
32
+ number_of_terms: terms.size.to_f
33
+ }
34
+
35
+ @inverted_index = terms.uniq.each_with_object(@inverted_index) do |term, index|
36
+ index[term] ||= []
37
+ index[term] << [
38
+ document,
39
+ { term: term, value: Tf.calculate(term, terms) }
40
+ ]
41
+ end
42
+
43
+ calculate_document_length_average
44
+
45
+ document
46
+ end
47
+
48
+ # Removes a document by id from index and documents list
49
+ def remove(id)
50
+ document = @documents.dig(id, :document)
51
+
52
+ terms = @indexing_pipeline.execute(document.fetch(:indexed_field))
53
+
54
+ terms.uniq.each do |term|
55
+ @inverted_index[term] = @inverted_index[term].reject do |document, _tf|
56
+ document.fetch(:id) == id
57
+ end
58
+
59
+ @inverted_index.delete(term) if @inverted_index[term].size == 0
60
+ end
61
+
62
+ removed_document = @documents.delete(id)
63
+
64
+ calculate_document_length_average
65
+
66
+ removed_document
67
+ end
68
+
69
+ def search(raw_terms, operator: 'or')
70
+ processed_terms = @querying_pipeline.execute(raw_terms)
71
+
72
+ # gets the documents that matches each term
73
+ results_by_terms = processed_terms.map do |term|
74
+ @inverted_index[term] || []
75
+ end
76
+
77
+ return [] unless results_by_terms.any?
78
+
79
+ idfs = generate_idfs(processed_terms)
80
+
81
+ # We flat and group by document id
82
+ any_term_matched_documents = results_by_terms.flatten(1).group_by do |document, _tf|
83
+ document.fetch(:id)
84
+ end
85
+
86
+ # We select documents based on operator
87
+ # if operator AND
88
+ # we select only documents that matched all terms
89
+ # else
90
+ # we select everthing
91
+ operator_specific_matched_documents = any_term_matched_documents.select do |_document_id, document_and_tfs|
92
+ match_terms_according_operator?(document_and_tfs,
93
+ processed_terms,
94
+ operator)
95
+ end
96
+
97
+ # map to a { document:, score: } structure.
98
+ document_and_scores = operator_specific_matched_documents.map do |document_id, document_and_tfs|
99
+ {
100
+ document: @documents.dig(document_id, :document),
101
+ score: calculate_score(@documents.fetch(document_id), document_and_tfs, idfs)
102
+ }
103
+ end
104
+
105
+ # sort by scores and wraps in a more convenient structure.
106
+ documents = document_and_scores
107
+ .sort_by { |item| -item[:score] }
108
+
109
+ { documents: documents, idfs: idfs, processed_terms: processed_terms }
110
+ end
111
+
112
+ def size
113
+ @documents.size
114
+ end
115
+
116
+ def stats
117
+ {
118
+ documents: @documents.size,
119
+ inverted_index: {
120
+ size: @inverted_index.size,
121
+ terms: @inverted_index.keys
122
+ }
123
+ }
124
+ end
125
+
126
+ private
127
+
128
+ def calculate_document_length_average
129
+ return 0.0 if @documents.empty?
130
+
131
+ all_terms_size = @documents.values.map { |document| document[:number_of_terms].to_f }.reduce(&:+).to_f
132
+
133
+ @document_length_average = all_terms_size.to_f / @documents.size.to_f
134
+ end
135
+
136
+ def match_terms_according_operator?(document_and_tfs, terms, operator)
137
+ return true if operator == 'or'
138
+
139
+ document_and_tfs.size == terms.size
140
+ end
141
+
142
+ def calculate_score(document, document_and_tfs, idfs)
143
+ terms_scores = document_and_tfs.map do |_document, tf|
144
+ Bm25.calculate(
145
+ tf: tf.fetch(:value),
146
+ idf: idfs[tf.fetch(:term)],
147
+ document_length: document[:number_of_terms],
148
+ document_length_average: @document_length_average
149
+ )
150
+ end
151
+
152
+ terms_scores.reduce(&:+)
153
+ end
154
+
155
+ def generate_idfs(processed_terms)
156
+ processed_terms.each_with_object({}) do |term, idfs|
157
+ if @index[term].to_a.any?
158
+ idfs[term] = Idf.calculate(@index[term].size, @documents.size)
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end