jekyll_ranked_search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 78ba1763b6b2bca798b128851cd24a67c5f8f47b980d9201c8045d33b40834e1
4
+ data.tar.gz: 0babe4848299d150103574360b54117207858f6a2171cd46dfc4c48fb971e041
5
+ SHA512:
6
+ metadata.gz: 729b789788706222be8f96680bd7a6d7839eccbd5eb66f04a1e8956e176e49c123c085ea2b52c2bdec32521904a788a1ea2f31a3b5443bf621a760e76960479a
7
+ data.tar.gz: 227866c90b5e664291d7a13a0da968e5e2c3b0c07972ec7ab20d574b0a0a25d9b1094b1202962c7b46f216d83e56b8646ad283b5af5c21574be84295180f4e88
@@ -0,0 +1,157 @@
1
+ require "set"
2
+ require "redcarpet"
3
+ require "redcarpet/render_strip"
4
+
5
+
6
+ # Renderer for Redcarpet that strips all links and only returns the text.
7
+ # Inherits from StripDown renderer.
8
+ class MarkdownRenderer < Redcarpet::Render::StripDown
9
+ def link(link, title, content)
10
+ content
11
+ end
12
+ end
13
+
14
+
15
+ # Jekyll plugin to generate a TF-IDF search index for posts.
16
+ class TfidfConverter < Jekyll::Generator
17
+ def generate(site)
18
+ Jekyll.logger.info "Jekyll Ranked Search: Generating search index"
19
+
20
+ self.generate_index(site, site.posts.docs)
21
+ site.pages << self.search_json(site)
22
+ site.pages << self.search_js(site)
23
+
24
+ Jekyll.logger.info "Jekyll Ranked Search: Done"
25
+ end
26
+
27
+ # Generate search index, calculate tfidf values
28
+ def generate_index(site, docs)
29
+ # All docs
30
+ processed_docs = []
31
+ # Map of word to document
32
+ word2doc = {}
33
+ # Bag of words, assigns word to index
34
+ bow = {}
35
+ # Term frequency per document in the format term_id,doc_id = freq
36
+ # This is a sparse matrix to save disk space and memory on the receiving end
37
+ tf = {}
38
+ # Frequency of words in documents as sparse matrix
39
+ df = {}
40
+ # Total number of documents
41
+ total_docs = docs.length
42
+
43
+ # Markdown parser
44
+ markdown = Redcarpet::Markdown.new(MarkdownRenderer)
45
+
46
+ # Create vocabulary
47
+ docs.each_with_index do |post, idx|
48
+ content = markdown.render(post.content)
49
+ # Replace newlines with wide spaces and bullet points
50
+ # TODO: Remove trailing bullet point
51
+ content.gsub!(/\n/, ' • ')
52
+ # TODO: Use first n words instead of characters
53
+ content = markdown.render(content)
54
+ content = content[..512] # The first 512 characters of the post
55
+
56
+ processed_docs.push({
57
+ title: post.data['title'],
58
+ url: post.url,
59
+ date: post.data['date'].strftime("%FT%T%z"),
60
+ text: content,
61
+ })
62
+
63
+ tokenized = self.tokenize_words "#{post.data['title']} #{content}"
64
+ token_seen = false
65
+ tokenized.each do |word|
66
+ if !bow.include?(word)
67
+ bow[word] = bow.length
68
+ end
69
+
70
+ # The key is the term_id which is calculated in the step before.
71
+ word2doc[bow[word]] ||= Set.new
72
+ word2doc[bow[word]] << idx
73
+
74
+ tf["#{bow[word]},#{idx}"] ||= 0
75
+ tf["#{bow[word]},#{idx}"] += 1
76
+ if !token_seen
77
+ df[bow[word]] ||= 0
78
+ df[bow[word]] += 1
79
+ end
80
+ end
81
+ end
82
+
83
+ # Convert word2doc set to array
84
+ word2doc.each_key do |key|
85
+ word2doc[key] = word2doc[key].to_a
86
+ end
87
+
88
+ # Save in site data object for access in templates
89
+ site.data['docs'] = processed_docs.to_json
90
+ site.data['word2doc'] = word2doc.to_json
91
+ site.data['bow'] = bow.to_json
92
+
93
+ # Calculate tf-idf for each document in the shape term_id,doc_id = tfidf
94
+ tfidf = {}
95
+ tf.each do |idx, freq|
96
+ token_idx, doc_idx = idx.split(',').map { |i| i.to_i }
97
+ # puts "token idx: #{token_idx}"
98
+ # puts df
99
+ _idf = Math.log(total_docs / df[token_idx] + 0.00001)
100
+
101
+ # Exponential decay over time (boost newer posts)
102
+ boost = 1.2**doc_idx/(total_docs/2)
103
+
104
+ # Calculate TF-IDF and boost newer posts by up to 20%
105
+ tfidf[idx] = (freq * _idf * boost).round(4)
106
+ end
107
+
108
+ site.data['tfidf'] = tfidf.to_json
109
+ end
110
+
111
+ def tokenize_words(doc)
112
+ # TODO: Better tokenization
113
+ @stopwords ||= self.load_stopwords
114
+ # replace_chars = /[-_:;@#,¿?¡!'"“”‘’`\/\(\)\[\]\{\}]/i
115
+ splitted_doc = doc.strip.downcase.split
116
+ splitted_doc.delete_if { |word| @stopwords.include?(word) }
117
+
118
+
119
+ # Remove special characters (only at beginning and end)
120
+ splitted_doc.map! { |word| word.gsub(/[^a-z0-9\s]/i, '') }
121
+
122
+ # splitted_doc.map! { |word| word.tr("@#!?.:;[]()", "") }
123
+ splitted_doc
124
+ end
125
+
126
+ # Load stopwords from file
127
+ def load_stopwords
128
+ Jekyll.logger.info "Loading stopwords"
129
+ stopwords = Set.new
130
+ File.open(File.join(File.dirname(__FILE__), "stopwords.txt"), "r") do |f|
131
+ f.each_line do |line|
132
+ stopwords.add line.strip
133
+ end
134
+ end
135
+ Jekyll.logger.info "Done loading #{stopwords.length} stopwords"
136
+ stopwords
137
+ end
138
+
139
+ # Create search.json from template and return as Jekyll Page object
140
+ def search_json(site)
141
+ template = File.read(File.join(File.dirname(__FILE__), "search.json"))
142
+ page = Jekyll::PageWithoutAFile.new(site, __dir__, "", "search.json").tap do |p|
143
+ p.content = template
144
+ end
145
+ page
146
+ end
147
+
148
+ # Create search.js from template and return as Jekyll Page object
149
+ def search_js(site)
150
+ search_js = File.read(File.join(File.dirname(__FILE__), "search.js"))
151
+ page = Jekyll::PageWithoutAFile.new(site, __dir__, "", "js/search.js").tap do |p|
152
+ p.content = search_js
153
+ end
154
+ page
155
+ end
156
+
157
+ end
data/lib/search.json ADDED
@@ -0,0 +1,6 @@
1
+ {
2
+ "word2doc": {{site.data.word2doc}},
3
+ "bow": {{site.data.bow}},
4
+ "tfidf": {{site.data.tfidf}},
5
+ "docs": {{site.data.docs}}
6
+ }
data/lib/stopwords.txt ADDED
@@ -0,0 +1,1298 @@
1
+ 'll
2
+ 'tis
3
+ 'twas
4
+ 've
5
+ 10
6
+ 39
7
+ a
8
+ a's
9
+ able
10
+ ableabout
11
+ about
12
+ above
13
+ abroad
14
+ abst
15
+ accordance
16
+ according
17
+ accordingly
18
+ across
19
+ act
20
+ actually
21
+ ad
22
+ added
23
+ adj
24
+ adopted
25
+ ae
26
+ af
27
+ affected
28
+ affecting
29
+ affects
30
+ after
31
+ afterwards
32
+ ag
33
+ again
34
+ against
35
+ ago
36
+ ah
37
+ ahead
38
+ ai
39
+ ain't
40
+ aint
41
+ al
42
+ all
43
+ allow
44
+ allows
45
+ almost
46
+ alone
47
+ along
48
+ alongside
49
+ already
50
+ also
51
+ although
52
+ always
53
+ am
54
+ amid
55
+ amidst
56
+ among
57
+ amongst
58
+ amoungst
59
+ amount
60
+ an
61
+ and
62
+ announce
63
+ another
64
+ any
65
+ anybody
66
+ anyhow
67
+ anymore
68
+ anyone
69
+ anything
70
+ anyway
71
+ anyways
72
+ anywhere
73
+ ao
74
+ apart
75
+ apparently
76
+ appear
77
+ appreciate
78
+ appropriate
79
+ approximately
80
+ aq
81
+ ar
82
+ are
83
+ area
84
+ areas
85
+ aren
86
+ aren't
87
+ arent
88
+ arise
89
+ around
90
+ arpa
91
+ as
92
+ aside
93
+ ask
94
+ asked
95
+ asking
96
+ asks
97
+ associated
98
+ at
99
+ au
100
+ auth
101
+ available
102
+ aw
103
+ away
104
+ awfully
105
+ az
106
+ b
107
+ ba
108
+ back
109
+ backed
110
+ backing
111
+ backs
112
+ backward
113
+ backwards
114
+ bb
115
+ bd
116
+ be
117
+ became
118
+ because
119
+ become
120
+ becomes
121
+ becoming
122
+ been
123
+ before
124
+ beforehand
125
+ began
126
+ begin
127
+ beginning
128
+ beginnings
129
+ begins
130
+ behind
131
+ being
132
+ beings
133
+ believe
134
+ below
135
+ beside
136
+ besides
137
+ best
138
+ better
139
+ between
140
+ beyond
141
+ bf
142
+ bg
143
+ bh
144
+ bi
145
+ big
146
+ bill
147
+ billion
148
+ biol
149
+ bj
150
+ bm
151
+ bn
152
+ bo
153
+ both
154
+ bottom
155
+ br
156
+ brief
157
+ briefly
158
+ bs
159
+ bt
160
+ but
161
+ buy
162
+ bv
163
+ bw
164
+ by
165
+ bz
166
+ c
167
+ c'mon
168
+ c's
169
+ ca
170
+ call
171
+ came
172
+ can
173
+ can't
174
+ cannot
175
+ cant
176
+ caption
177
+ case
178
+ cases
179
+ cause
180
+ causes
181
+ cc
182
+ cd
183
+ certain
184
+ certainly
185
+ cf
186
+ cg
187
+ ch
188
+ changes
189
+ ci
190
+ ck
191
+ cl
192
+ clear
193
+ clearly
194
+ click
195
+ cm
196
+ cmon
197
+ cn
198
+ co
199
+ co.
200
+ com
201
+ come
202
+ comes
203
+ computer
204
+ con
205
+ concerning
206
+ consequently
207
+ consider
208
+ considering
209
+ contain
210
+ containing
211
+ contains
212
+ copy
213
+ corresponding
214
+ could
215
+ could've
216
+ couldn
217
+ couldn't
218
+ couldnt
219
+ course
220
+ cr
221
+ cry
222
+ cs
223
+ cu
224
+ currently
225
+ cv
226
+ cx
227
+ cy
228
+ cz
229
+ d
230
+ dare
231
+ daren't
232
+ darent
233
+ date
234
+ de
235
+ dear
236
+ definitely
237
+ describe
238
+ described
239
+ despite
240
+ detail
241
+ did
242
+ didn
243
+ didn't
244
+ didnt
245
+ differ
246
+ different
247
+ differently
248
+ directly
249
+ dj
250
+ dk
251
+ dm
252
+ do
253
+ does
254
+ doesn
255
+ doesn't
256
+ doesnt
257
+ doing
258
+ don
259
+ don't
260
+ done
261
+ dont
262
+ doubtful
263
+ down
264
+ downed
265
+ downing
266
+ downs
267
+ downwards
268
+ due
269
+ during
270
+ dz
271
+ e
272
+ each
273
+ early
274
+ ec
275
+ ed
276
+ edu
277
+ ee
278
+ effect
279
+ eg
280
+ eh
281
+ eight
282
+ eighty
283
+ either
284
+ eleven
285
+ else
286
+ elsewhere
287
+ empty
288
+ end
289
+ ended
290
+ ending
291
+ ends
292
+ enough
293
+ entirely
294
+ er
295
+ es
296
+ especially
297
+ et
298
+ et-al
299
+ etc
300
+ even
301
+ evenly
302
+ ever
303
+ evermore
304
+ every
305
+ everybody
306
+ everyone
307
+ everything
308
+ everywhere
309
+ ex
310
+ exactly
311
+ example
312
+ except
313
+ f
314
+ face
315
+ faces
316
+ fact
317
+ facts
318
+ fairly
319
+ far
320
+ farther
321
+ felt
322
+ few
323
+ fewer
324
+ ff
325
+ fi
326
+ fifteen
327
+ fifth
328
+ fifty
329
+ fify
330
+ fill
331
+ find
332
+ finds
333
+ fire
334
+ first
335
+ five
336
+ fix
337
+ fj
338
+ fk
339
+ fm
340
+ fo
341
+ followed
342
+ following
343
+ follows
344
+ for
345
+ forever
346
+ former
347
+ formerly
348
+ forth
349
+ forty
350
+ forward
351
+ found
352
+ four
353
+ fr
354
+ free
355
+ from
356
+ front
357
+ full
358
+ fully
359
+ further
360
+ furthered
361
+ furthering
362
+ furthermore
363
+ furthers
364
+ fx
365
+ g
366
+ ga
367
+ gave
368
+ gb
369
+ gd
370
+ ge
371
+ general
372
+ generally
373
+ get
374
+ gets
375
+ getting
376
+ gf
377
+ gg
378
+ gh
379
+ gi
380
+ give
381
+ given
382
+ gives
383
+ giving
384
+ gl
385
+ gm
386
+ gmt
387
+ gn
388
+ go
389
+ goes
390
+ going
391
+ gone
392
+ good
393
+ goods
394
+ got
395
+ gotten
396
+ gov
397
+ gp
398
+ gq
399
+ gr
400
+ great
401
+ greater
402
+ greatest
403
+ greetings
404
+ group
405
+ grouped
406
+ grouping
407
+ groups
408
+ gs
409
+ gt
410
+ gu
411
+ gw
412
+ gy
413
+ h
414
+ had
415
+ hadn't
416
+ hadnt
417
+ half
418
+ happens
419
+ hardly
420
+ has
421
+ hasn
422
+ hasn't
423
+ hasnt
424
+ have
425
+ haven
426
+ haven't
427
+ havent
428
+ having
429
+ he
430
+ he'd
431
+ he'll
432
+ he's
433
+ hed
434
+ hell
435
+ hello
436
+ help
437
+ hence
438
+ her
439
+ here
440
+ here's
441
+ hereafter
442
+ hereby
443
+ herein
444
+ heres
445
+ hereupon
446
+ hers
447
+ herself
448
+ herse”
449
+ hes
450
+ hi
451
+ hid
452
+ high
453
+ higher
454
+ highest
455
+ him
456
+ himself
457
+ himse”
458
+ his
459
+ hither
460
+ hk
461
+ hm
462
+ hn
463
+ home
464
+ homepage
465
+ hopefully
466
+ how
467
+ how'd
468
+ how'll
469
+ how's
470
+ howbeit
471
+ however
472
+ hr
473
+ ht
474
+ htm
475
+ html
476
+ http
477
+ hu
478
+ hundred
479
+ i
480
+ i'd
481
+ i'll
482
+ i'm
483
+ i've
484
+ i.e.
485
+ id
486
+ ie
487
+ if
488
+ ignored
489
+ ii
490
+ il
491
+ ill
492
+ im
493
+ immediate
494
+ immediately
495
+ importance
496
+ important
497
+ in
498
+ inasmuch
499
+ inc
500
+ inc.
501
+ indeed
502
+ index
503
+ indicate
504
+ indicated
505
+ indicates
506
+ information
507
+ inner
508
+ inside
509
+ insofar
510
+ instead
511
+ int
512
+ interest
513
+ interested
514
+ interesting
515
+ interests
516
+ into
517
+ invention
518
+ inward
519
+ io
520
+ iq
521
+ ir
522
+ is
523
+ isn
524
+ isn't
525
+ isnt
526
+ it
527
+ it'd
528
+ it'll
529
+ it's
530
+ itd
531
+ itll
532
+ its
533
+ itself
534
+ itse”
535
+ ive
536
+ j
537
+ je
538
+ jm
539
+ jo
540
+ join
541
+ jp
542
+ just
543
+ k
544
+ ke
545
+ keep
546
+ keeps
547
+ kept
548
+ keys
549
+ kg
550
+ kh
551
+ ki
552
+ kind
553
+ km
554
+ kn
555
+ knew
556
+ know
557
+ known
558
+ knows
559
+ kp
560
+ kr
561
+ kw
562
+ ky
563
+ kz
564
+ l
565
+ la
566
+ large
567
+ largely
568
+ last
569
+ lately
570
+ later
571
+ latest
572
+ latter
573
+ latterly
574
+ lb
575
+ lc
576
+ least
577
+ length
578
+ less
579
+ lest
580
+ let
581
+ let's
582
+ lets
583
+ li
584
+ like
585
+ liked
586
+ likely
587
+ likewise
588
+ line
589
+ little
590
+ lk
591
+ ll
592
+ long
593
+ longer
594
+ longest
595
+ look
596
+ looking
597
+ looks
598
+ low
599
+ lower
600
+ lr
601
+ ls
602
+ lt
603
+ ltd
604
+ lu
605
+ lv
606
+ ly
607
+ m
608
+ ma
609
+ made
610
+ mainly
611
+ make
612
+ makes
613
+ making
614
+ man
615
+ many
616
+ may
617
+ maybe
618
+ mayn't
619
+ maynt
620
+ mc
621
+ md
622
+ me
623
+ mean
624
+ means
625
+ meantime
626
+ meanwhile
627
+ member
628
+ members
629
+ men
630
+ merely
631
+ mg
632
+ mh
633
+ microsoft
634
+ might
635
+ might've
636
+ mightn't
637
+ mightnt
638
+ mil
639
+ mill
640
+ million
641
+ mine
642
+ minus
643
+ miss
644
+ mk
645
+ ml
646
+ mm
647
+ mn
648
+ mo
649
+ more
650
+ moreover
651
+ most
652
+ mostly
653
+ move
654
+ mp
655
+ mq
656
+ mr
657
+ mrs
658
+ ms
659
+ msie
660
+ mt
661
+ mu
662
+ much
663
+ mug
664
+ must
665
+ must've
666
+ mustn't
667
+ mustnt
668
+ mv
669
+ mw
670
+ mx
671
+ my
672
+ myself
673
+ myse”
674
+ mz
675
+ n
676
+ na
677
+ name
678
+ namely
679
+ nay
680
+ nc
681
+ nd
682
+ ne
683
+ near
684
+ nearly
685
+ necessarily
686
+ necessary
687
+ need
688
+ needed
689
+ needing
690
+ needn't
691
+ neednt
692
+ needs
693
+ neither
694
+ net
695
+ netscape
696
+ never
697
+ neverf
698
+ neverless
699
+ nevertheless
700
+ new
701
+ newer
702
+ newest
703
+ next
704
+ nf
705
+ ng
706
+ ni
707
+ nine
708
+ ninety
709
+ nl
710
+ no
711
+ no-one
712
+ nobody
713
+ non
714
+ none
715
+ nonetheless
716
+ noone
717
+ nor
718
+ normally
719
+ nos
720
+ not
721
+ noted
722
+ nothing
723
+ notwithstanding
724
+ novel
725
+ now
726
+ nowhere
727
+ np
728
+ nr
729
+ nu
730
+ null
731
+ number
732
+ numbers
733
+ nz
734
+ o
735
+ obtain
736
+ obtained
737
+ obviously
738
+ of
739
+ off
740
+ often
741
+ oh
742
+ ok
743
+ okay
744
+ old
745
+ older
746
+ oldest
747
+ om
748
+ omitted
749
+ on
750
+ once
751
+ one
752
+ one's
753
+ ones
754
+ only
755
+ onto
756
+ open
757
+ opened
758
+ opening
759
+ opens
760
+ opposite
761
+ or
762
+ ord
763
+ order
764
+ ordered
765
+ ordering
766
+ orders
767
+ org
768
+ other
769
+ others
770
+ otherwise
771
+ ought
772
+ oughtn't
773
+ oughtnt
774
+ our
775
+ ours
776
+ ourselves
777
+ out
778
+ outside
779
+ over
780
+ overall
781
+ owing
782
+ own
783
+ p
784
+ pa
785
+ page
786
+ pages
787
+ part
788
+ parted
789
+ particular
790
+ particularly
791
+ parting
792
+ parts
793
+ past
794
+ pe
795
+ per
796
+ perhaps
797
+ pf
798
+ pg
799
+ ph
800
+ pk
801
+ pl
802
+ place
803
+ placed
804
+ places
805
+ please
806
+ plus
807
+ pm
808
+ pmid
809
+ pn
810
+ point
811
+ pointed
812
+ pointing
813
+ points
814
+ poorly
815
+ possible
816
+ possibly
817
+ potentially
818
+ pp
819
+ pr
820
+ predominantly
821
+ present
822
+ presented
823
+ presenting
824
+ presents
825
+ presumably
826
+ previously
827
+ primarily
828
+ probably
829
+ problem
830
+ problems
831
+ promptly
832
+ proud
833
+ provided
834
+ provides
835
+ pt
836
+ put
837
+ puts
838
+ pw
839
+ py
840
+ q
841
+ qa
842
+ que
843
+ quickly
844
+ quite
845
+ qv
846
+ r
847
+ ran
848
+ rather
849
+ rd
850
+ re
851
+ readily
852
+ really
853
+ reasonably
854
+ recent
855
+ recently
856
+ ref
857
+ refs
858
+ regarding
859
+ regardless
860
+ regards
861
+ related
862
+ relatively
863
+ research
864
+ reserved
865
+ respectively
866
+ resulted
867
+ resulting
868
+ results
869
+ right
870
+ ring
871
+ ro
872
+ room
873
+ rooms
874
+ round
875
+ ru
876
+ run
877
+ rw
878
+ s
879
+ sa
880
+ said
881
+ same
882
+ saw
883
+ say
884
+ saying
885
+ says
886
+ sb
887
+ sc
888
+ sd
889
+ se
890
+ sec
891
+ second
892
+ secondly
893
+ seconds
894
+ section
895
+ see
896
+ seeing
897
+ seem
898
+ seemed
899
+ seeming
900
+ seems
901
+ seen
902
+ sees
903
+ self
904
+ selves
905
+ sensible
906
+ sent
907
+ serious
908
+ seriously
909
+ seven
910
+ seventy
911
+ several
912
+ sg
913
+ sh
914
+ shall
915
+ shan't
916
+ shant
917
+ she
918
+ she'd
919
+ she'll
920
+ she's
921
+ shed
922
+ shell
923
+ shes
924
+ should
925
+ should've
926
+ shouldn
927
+ shouldn't
928
+ shouldnt
929
+ show
930
+ showed
931
+ showing
932
+ shown
933
+ showns
934
+ shows
935
+ si
936
+ side
937
+ sides
938
+ significant
939
+ significantly
940
+ similar
941
+ similarly
942
+ since
943
+ sincere
944
+ site
945
+ six
946
+ sixty
947
+ sj
948
+ sk
949
+ sl
950
+ slightly
951
+ sm
952
+ small
953
+ smaller
954
+ smallest
955
+ sn
956
+ so
957
+ some
958
+ somebody
959
+ someday
960
+ somehow
961
+ someone
962
+ somethan
963
+ something
964
+ sometime
965
+ sometimes
966
+ somewhat
967
+ somewhere
968
+ soon
969
+ sorry
970
+ specifically
971
+ specified
972
+ specify
973
+ specifying
974
+ sr
975
+ st
976
+ state
977
+ states
978
+ still
979
+ stop
980
+ strongly
981
+ su
982
+ sub
983
+ substantially
984
+ successfully
985
+ such
986
+ sufficiently
987
+ suggest
988
+ sup
989
+ sure
990
+ sv
991
+ sy
992
+ system
993
+ sz
994
+ t
995
+ t's
996
+ take
997
+ taken
998
+ taking
999
+ tc
1000
+ td
1001
+ tell
1002
+ ten
1003
+ tends
1004
+ test
1005
+ text
1006
+ tf
1007
+ tg
1008
+ th
1009
+ than
1010
+ thank
1011
+ thanks
1012
+ thanx
1013
+ that
1014
+ that'll
1015
+ that's
1016
+ that've
1017
+ thatll
1018
+ thats
1019
+ thatve
1020
+ the
1021
+ their
1022
+ theirs
1023
+ them
1024
+ themselves
1025
+ then
1026
+ thence
1027
+ there
1028
+ there'd
1029
+ there'll
1030
+ there're
1031
+ there's
1032
+ there've
1033
+ thereafter
1034
+ thereby
1035
+ thered
1036
+ therefore
1037
+ therein
1038
+ therell
1039
+ thereof
1040
+ therere
1041
+ theres
1042
+ thereto
1043
+ thereupon
1044
+ thereve
1045
+ these
1046
+ they
1047
+ they'd
1048
+ they'll
1049
+ they're
1050
+ they've
1051
+ theyd
1052
+ theyll
1053
+ theyre
1054
+ theyve
1055
+ thick
1056
+ thin
1057
+ thing
1058
+ things
1059
+ think
1060
+ thinks
1061
+ third
1062
+ thirty
1063
+ this
1064
+ thorough
1065
+ thoroughly
1066
+ those
1067
+ thou
1068
+ though
1069
+ thoughh
1070
+ thought
1071
+ thoughts
1072
+ thousand
1073
+ three
1074
+ throug
1075
+ through
1076
+ throughout
1077
+ thru
1078
+ thus
1079
+ til
1080
+ till
1081
+ tip
1082
+ tis
1083
+ tj
1084
+ tk
1085
+ tm
1086
+ tn
1087
+ to
1088
+ today
1089
+ together
1090
+ too
1091
+ took
1092
+ top
1093
+ toward
1094
+ towards
1095
+ tp
1096
+ tr
1097
+ tried
1098
+ tries
1099
+ trillion
1100
+ truly
1101
+ try
1102
+ trying
1103
+ ts
1104
+ tt
1105
+ turn
1106
+ turned
1107
+ turning
1108
+ turns
1109
+ tv
1110
+ tw
1111
+ twas
1112
+ twelve
1113
+ twenty
1114
+ twice
1115
+ two
1116
+ tz
1117
+ u
1118
+ ua
1119
+ ug
1120
+ uk
1121
+ um
1122
+ un
1123
+ under
1124
+ underneath
1125
+ undoing
1126
+ unfortunately
1127
+ unless
1128
+ unlike
1129
+ unlikely
1130
+ until
1131
+ unto
1132
+ up
1133
+ upon
1134
+ ups
1135
+ upwards
1136
+ us
1137
+ use
1138
+ used
1139
+ useful
1140
+ usefully
1141
+ usefulness
1142
+ uses
1143
+ using
1144
+ usually
1145
+ uucp
1146
+ uy
1147
+ uz
1148
+ v
1149
+ va
1150
+ value
1151
+ various
1152
+ vc
1153
+ ve
1154
+ versus
1155
+ very
1156
+ vg
1157
+ vi
1158
+ via
1159
+ viz
1160
+ vn
1161
+ vol
1162
+ vols
1163
+ vs
1164
+ vu
1165
+ w
1166
+ want
1167
+ wanted
1168
+ wanting
1169
+ wants
1170
+ was
1171
+ wasn
1172
+ wasn't
1173
+ wasnt
1174
+ way
1175
+ ways
1176
+ we
1177
+ we'd
1178
+ we'll
1179
+ we're
1180
+ we've
1181
+ web
1182
+ webpage
1183
+ website
1184
+ wed
1185
+ welcome
1186
+ well
1187
+ wells
1188
+ went
1189
+ were
1190
+ weren
1191
+ weren't
1192
+ werent
1193
+ weve
1194
+ wf
1195
+ what
1196
+ what'd
1197
+ what'll
1198
+ what's
1199
+ what've
1200
+ whatever
1201
+ whatll
1202
+ whats
1203
+ whatve
1204
+ when
1205
+ when'd
1206
+ when'll
1207
+ when's
1208
+ whence
1209
+ whenever
1210
+ where
1211
+ where'd
1212
+ where'll
1213
+ where's
1214
+ whereafter
1215
+ whereas
1216
+ whereby
1217
+ wherein
1218
+ wheres
1219
+ whereupon
1220
+ wherever
1221
+ whether
1222
+ which
1223
+ whichever
1224
+ while
1225
+ whilst
1226
+ whim
1227
+ whither
1228
+ who
1229
+ who'd
1230
+ who'll
1231
+ who's
1232
+ whod
1233
+ whoever
1234
+ whole
1235
+ wholl
1236
+ whom
1237
+ whomever
1238
+ whos
1239
+ whose
1240
+ why
1241
+ why'd
1242
+ why'll
1243
+ why's
1244
+ widely
1245
+ width
1246
+ will
1247
+ willing
1248
+ wish
1249
+ with
1250
+ within
1251
+ without
1252
+ won
1253
+ won't
1254
+ wonder
1255
+ wont
1256
+ words
1257
+ work
1258
+ worked
1259
+ working
1260
+ works
1261
+ world
1262
+ would
1263
+ would've
1264
+ wouldn
1265
+ wouldn't
1266
+ wouldnt
1267
+ ws
1268
+ www
1269
+ x
1270
+ y
1271
+ ye
1272
+ year
1273
+ years
1274
+ yes
1275
+ yet
1276
+ you
1277
+ you'd
1278
+ you'll
1279
+ you're
1280
+ you've
1281
+ youd
1282
+ youll
1283
+ young
1284
+ younger
1285
+ youngest
1286
+ your
1287
+ youre
1288
+ yours
1289
+ yourself
1290
+ yourselves
1291
+ youve
1292
+ yt
1293
+ yu
1294
+ z
1295
+ za
1296
+ zero
1297
+ zm
1298
+ zr
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll_ranked_search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Friedrich Ewald
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-06-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: redcarpet
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.6'
27
+ description: Search for Jekyll posts using TF-IDF
28
+ email: freddiemailster@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/jekyll_ranked_search.rb
34
+ - lib/search.json
35
+ - lib/stopwords.txt
36
+ homepage: https://github.com/f-ewald/jekyll_ranked_search
37
+ licenses:
38
+ - MIT
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubygems_version: 3.4.13
56
+ signing_key:
57
+ specification_version: 4
58
+ summary: TF-IDF search for Jekyll posts
59
+ test_files: []