sclust 1.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ #
2
+ # The MIT License
3
+ #
4
+ # Copyright (c) 2010 Samuel R. Baskinger
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+ #
24
+
25
+ module SClust
26
+
27
+ module Util
28
+
29
+ #
30
+ # Use cases:
31
+ #
32
+ #
33
+ class SparseVector < Hash
34
+
35
+ def initialize(default_value=nil)
36
+ super(default_value)
37
+ @default_value = default_value
38
+ end
39
+
40
+ def store(key, value)
41
+ if ( @default_value == value)
42
+ delete(key) if ( member?(key) )
43
+ value
44
+ else
45
+ super(key, value)
46
+ end
47
+ end
48
+
49
+ def [](key)
50
+ if has_key?(key)
51
+ super(key)
52
+ else
53
+ @default_value
54
+ end
55
+ end
56
+
57
+ alias []= store
58
+
59
+ end
60
+
61
+ class SparseLabeledVector < SparseVector
62
+
63
+ # Map keys to the user-defined label.
64
+ attr_reader :key_map
65
+
66
+ # Map labels to the key the data is stored under.
67
+ attr_reader :label_map
68
+
69
+ def initialize(default_value=nil)
70
+ super(default_value)
71
+ @label_map = {}
72
+ @key_map = {}
73
+ end
74
+
75
+ # Aliased to []=, this stored the (key, value) pair as in the Hash class but accepts an optional 3rd element
76
+ # which will label the key. This populates values in the attributes label_map[label] => key and key_map[key] => label.
77
+ def store(key, value, label=nil)
78
+ super(key, value)
79
+
80
+ if label
81
+ @label_map[label] = key
82
+ @key_map[key] = label
83
+ end
84
+ end
85
+
86
+ def delete(key)
87
+ if super(key)
88
+ label = @key_map.delete(key)
89
+
90
+ @label_map.delete(label) if label
91
+ end
92
+ end
93
+ end
94
+
95
+ end
96
+ end
@@ -0,0 +1,1149 @@
1
+ module SClust
2
+ module Util
3
+ module StopwordList
4
+ @@stopword_list = %w(
5
+ a
6
+
7
+ a's
8
+
9
+ able
10
+
11
+ about
12
+
13
+ above
14
+
15
+ according
16
+
17
+ accordingly
18
+
19
+ across
20
+
21
+ actually
22
+
23
+ after
24
+
25
+ afterwards
26
+
27
+ again
28
+
29
+ against
30
+
31
+ ain't
32
+
33
+ all
34
+
35
+ allow
36
+
37
+ allows
38
+
39
+ almost
40
+
41
+ alone
42
+
43
+ along
44
+
45
+ already
46
+
47
+ also
48
+
49
+ although
50
+
51
+ always
52
+
53
+ am
54
+
55
+ among
56
+
57
+ amongst
58
+
59
+ an
60
+
61
+ and
62
+
63
+ another
64
+
65
+ any
66
+
67
+ anybody
68
+
69
+ anyhow
70
+
71
+ anyone
72
+
73
+ anything
74
+
75
+ anyway
76
+
77
+ anyways
78
+
79
+ anywhere
80
+
81
+ apart
82
+
83
+ appear
84
+
85
+ appreciate
86
+
87
+ appropriate
88
+
89
+ are
90
+
91
+ aren't
92
+
93
+ around
94
+
95
+ as
96
+
97
+ aside
98
+
99
+ ask
100
+
101
+ asking
102
+
103
+ associated
104
+
105
+ at
106
+
107
+ available
108
+
109
+ away
110
+
111
+ awfully
112
+
113
+ b
114
+
115
+ be
116
+
117
+ became
118
+
119
+ because
120
+
121
+ become
122
+
123
+ becomes
124
+
125
+ becoming
126
+
127
+ been
128
+
129
+ before
130
+
131
+ beforehand
132
+
133
+ behind
134
+
135
+ being
136
+
137
+ believe
138
+
139
+ below
140
+
141
+ beside
142
+
143
+ besides
144
+
145
+ best
146
+
147
+ better
148
+
149
+ between
150
+
151
+ beyond
152
+
153
+ both
154
+
155
+ brief
156
+
157
+ but
158
+
159
+ by
160
+
161
+ c
162
+
163
+ c'mon
164
+
165
+ c's
166
+
167
+ came
168
+
169
+ can
170
+
171
+ can't
172
+
173
+ cannot
174
+
175
+ cant
176
+
177
+ cause
178
+
179
+ causes
180
+
181
+ certain
182
+
183
+ certainly
184
+
185
+ changes
186
+
187
+ clearly
188
+
189
+ co
190
+
191
+ com
192
+
193
+ come
194
+
195
+ comes
196
+
197
+ concerning
198
+
199
+ consequently
200
+
201
+ consider
202
+
203
+ considering
204
+
205
+ contain
206
+
207
+ containing
208
+
209
+ contains
210
+
211
+ corresponding
212
+
213
+ could
214
+
215
+ couldn't
216
+
217
+ course
218
+
219
+ currently
220
+
221
+ d
222
+
223
+ definitely
224
+
225
+ described
226
+
227
+ despite
228
+
229
+ did
230
+
231
+ didn't
232
+
233
+ different
234
+
235
+ do
236
+
237
+ does
238
+
239
+ doesn't
240
+
241
+ doing
242
+
243
+ don't
244
+
245
+ done
246
+
247
+ down
248
+
249
+ downwards
250
+
251
+ during
252
+
253
+ e
254
+
255
+ each
256
+
257
+ edu
258
+
259
+ eg
260
+
261
+ eight
262
+
263
+ either
264
+
265
+ else
266
+
267
+ elsewhere
268
+
269
+ enough
270
+
271
+ entirely
272
+
273
+ especially
274
+
275
+ et
276
+
277
+ etc
278
+
279
+ even
280
+
281
+ ever
282
+
283
+ every
284
+
285
+ everybody
286
+
287
+ everyone
288
+
289
+ everything
290
+
291
+ everywhere
292
+
293
+ ex
294
+
295
+ exactly
296
+
297
+ example
298
+
299
+ except
300
+
301
+ f
302
+
303
+ far
304
+
305
+ few
306
+
307
+ fifth
308
+
309
+ first
310
+
311
+ five
312
+
313
+ followed
314
+
315
+ following
316
+
317
+ follows
318
+
319
+ for
320
+
321
+ former
322
+
323
+ formerly
324
+
325
+ forth
326
+
327
+ four
328
+
329
+ from
330
+
331
+ further
332
+
333
+ furthermore
334
+
335
+ g
336
+
337
+ get
338
+
339
+ gets
340
+
341
+ getting
342
+
343
+ given
344
+
345
+ gives
346
+
347
+ go
348
+
349
+ goes
350
+
351
+ going
352
+
353
+ gone
354
+
355
+ got
356
+
357
+ gotten
358
+
359
+ greetings
360
+
361
+ h
362
+
363
+ had
364
+
365
+ hadn't
366
+
367
+ happens
368
+
369
+ hardly
370
+
371
+ has
372
+
373
+ hasn't
374
+
375
+ have
376
+
377
+ haven't
378
+
379
+ having
380
+
381
+ he
382
+
383
+ he's
384
+
385
+ hello
386
+
387
+ help
388
+
389
+ hence
390
+
391
+ her
392
+
393
+ here
394
+
395
+ here's
396
+
397
+ hereafter
398
+
399
+ hereby
400
+
401
+ herein
402
+
403
+ hereupon
404
+
405
+ hers
406
+
407
+ herself
408
+
409
+ hi
410
+
411
+ him
412
+
413
+ himself
414
+
415
+ his
416
+
417
+ hither
418
+
419
+ hopefully
420
+
421
+ how
422
+
423
+ howbeit
424
+
425
+ however
426
+
427
+ i
428
+
429
+ i'd
430
+
431
+ i'll
432
+
433
+ i'm
434
+
435
+ i've
436
+
437
+ ie
438
+
439
+ if
440
+
441
+ ignored
442
+
443
+ immediate
444
+
445
+ in
446
+
447
+ inasmuch
448
+
449
+ inc
450
+
451
+ indeed
452
+
453
+ indicate
454
+
455
+ indicated
456
+
457
+ indicates
458
+
459
+ inner
460
+
461
+ insofar
462
+
463
+ instead
464
+
465
+ into
466
+
467
+ inward
468
+
469
+ is
470
+
471
+ isn't
472
+
473
+ it
474
+
475
+ it'd
476
+
477
+ it'll
478
+
479
+ it's
480
+
481
+ its
482
+
483
+ itself
484
+
485
+ j
486
+
487
+ just
488
+
489
+ k
490
+
491
+ keep
492
+
493
+ keeps
494
+
495
+ kept
496
+
497
+ know
498
+
499
+ knows
500
+
501
+ known
502
+
503
+ l
504
+
505
+ last
506
+
507
+ lately
508
+
509
+ later
510
+
511
+ latter
512
+
513
+ latterly
514
+
515
+ least
516
+
517
+ less
518
+
519
+ lest
520
+
521
+ let
522
+
523
+ let's
524
+
525
+ like
526
+
527
+ liked
528
+
529
+ likely
530
+
531
+ little
532
+
533
+ look
534
+
535
+ looking
536
+
537
+ looks
538
+
539
+ ltd
540
+
541
+ m
542
+
543
+ mainly
544
+
545
+ many
546
+
547
+ may
548
+
549
+ maybe
550
+
551
+ me
552
+
553
+ mean
554
+
555
+ meanwhile
556
+
557
+ merely
558
+
559
+ might
560
+
561
+ more
562
+
563
+ moreover
564
+
565
+ most
566
+
567
+ mostly
568
+
569
+ much
570
+
571
+ must
572
+
573
+ my
574
+
575
+ myself
576
+
577
+ n
578
+
579
+ name
580
+
581
+ namely
582
+
583
+ nd
584
+
585
+ near
586
+
587
+ nearly
588
+
589
+ necessary
590
+
591
+ need
592
+
593
+ needs
594
+
595
+ neither
596
+
597
+ never
598
+
599
+ nevertheless
600
+
601
+ new
602
+
603
+ next
604
+
605
+ nine
606
+
607
+ no
608
+
609
+ nobody
610
+
611
+ non
612
+
613
+ none
614
+
615
+ noone
616
+
617
+ nor
618
+
619
+ normally
620
+
621
+ not
622
+
623
+ nothing
624
+
625
+ novel
626
+
627
+ now
628
+
629
+ nowhere
630
+
631
+ o
632
+
633
+ obviously
634
+
635
+ of
636
+
637
+ off
638
+
639
+ often
640
+
641
+ oh
642
+
643
+ ok
644
+
645
+ okay
646
+
647
+ old
648
+
649
+ on
650
+
651
+ once
652
+
653
+ one
654
+
655
+ ones
656
+
657
+ only
658
+
659
+ onto
660
+
661
+ or
662
+
663
+ other
664
+
665
+ others
666
+
667
+ otherwise
668
+
669
+ ought
670
+
671
+ our
672
+
673
+ ours
674
+
675
+ ourselves
676
+
677
+ out
678
+
679
+ outside
680
+
681
+ over
682
+
683
+ overall
684
+
685
+ own
686
+
687
+ p
688
+
689
+ particular
690
+
691
+ particularly
692
+
693
+ per
694
+
695
+ perhaps
696
+
697
+ placed
698
+
699
+ please
700
+
701
+ plus
702
+
703
+ possible
704
+
705
+ presumably
706
+
707
+ probably
708
+
709
+ provides
710
+
711
+ q
712
+
713
+ que
714
+
715
+ quite
716
+
717
+ qv
718
+
719
+ r
720
+
721
+ rather
722
+
723
+ rd
724
+
725
+ re
726
+
727
+ really
728
+
729
+ reasonably
730
+
731
+ regarding
732
+
733
+ regardless
734
+
735
+ regards
736
+
737
+ relatively
738
+
739
+ respectively
740
+
741
+ right
742
+
743
+ s
744
+
745
+ said
746
+
747
+ same
748
+
749
+ saw
750
+
751
+ say
752
+
753
+ saying
754
+
755
+ says
756
+
757
+ second
758
+
759
+ secondly
760
+
761
+ see
762
+
763
+ seeing
764
+
765
+ seem
766
+
767
+ seemed
768
+
769
+ seeming
770
+
771
+ seems
772
+
773
+ seen
774
+
775
+ self
776
+
777
+ selves
778
+
779
+ sensible
780
+
781
+ sent
782
+
783
+ serious
784
+
785
+ seriously
786
+
787
+ seven
788
+
789
+ several
790
+
791
+ shall
792
+
793
+ she
794
+
795
+ should
796
+
797
+ shouldn't
798
+
799
+ since
800
+
801
+ six
802
+
803
+ so
804
+
805
+ some
806
+
807
+ somebody
808
+
809
+ somehow
810
+
811
+ someone
812
+
813
+ something
814
+
815
+ sometime
816
+
817
+ sometimes
818
+
819
+ somewhat
820
+
821
+ somewhere
822
+
823
+ soon
824
+
825
+ sorry
826
+
827
+ specified
828
+
829
+ specify
830
+
831
+ specifying
832
+
833
+ still
834
+
835
+ sub
836
+
837
+ such
838
+
839
+ sup
840
+
841
+ sure
842
+
843
+ t
844
+
845
+ t's
846
+
847
+ take
848
+
849
+ taken
850
+
851
+ tell
852
+
853
+ tends
854
+
855
+ th
856
+
857
+ than
858
+
859
+ thank
860
+
861
+ thanks
862
+
863
+ thanx
864
+
865
+ that
866
+
867
+ that's
868
+
869
+ thats
870
+
871
+ the
872
+
873
+ their
874
+
875
+ theirs
876
+
877
+ them
878
+
879
+ themselves
880
+
881
+ then
882
+
883
+ thence
884
+
885
+ there
886
+
887
+ there's
888
+
889
+ thereafter
890
+
891
+ thereby
892
+
893
+ therefore
894
+
895
+ therein
896
+
897
+ theres
898
+
899
+ thereupon
900
+
901
+ these
902
+
903
+ they
904
+
905
+ they'd
906
+
907
+ they'll
908
+
909
+ they're
910
+
911
+ they've
912
+
913
+ think
914
+
915
+ third
916
+
917
+ this
918
+
919
+ thorough
920
+
921
+ thoroughly
922
+
923
+ those
924
+
925
+ though
926
+
927
+ three
928
+
929
+ through
930
+
931
+ throughout
932
+
933
+ thru
934
+
935
+ thus
936
+
937
+ to
938
+
939
+ together
940
+
941
+ too
942
+
943
+ took
944
+
945
+ toward
946
+
947
+ towards
948
+
949
+ tried
950
+
951
+ tries
952
+
953
+ truly
954
+
955
+ try
956
+
957
+ trying
958
+
959
+ twice
960
+
961
+ two
962
+
963
+ u
964
+
965
+ un
966
+
967
+ under
968
+
969
+ unfortunately
970
+
971
+ unless
972
+
973
+ unlikely
974
+
975
+ until
976
+
977
+ unto
978
+
979
+ up
980
+
981
+ upon
982
+
983
+ us
984
+
985
+ use
986
+
987
+ used
988
+
989
+ useful
990
+
991
+ uses
992
+
993
+ using
994
+
995
+ usually
996
+
997
+ uucp
998
+
999
+ v
1000
+
1001
+ value
1002
+
1003
+ various
1004
+
1005
+ very
1006
+
1007
+ via
1008
+
1009
+ viz
1010
+
1011
+ vs
1012
+
1013
+ w
1014
+
1015
+ want
1016
+
1017
+ wants
1018
+
1019
+ was
1020
+
1021
+ wasn't
1022
+
1023
+ way
1024
+
1025
+ we
1026
+
1027
+ we'd
1028
+
1029
+ we'll
1030
+
1031
+ we're
1032
+
1033
+ we've
1034
+
1035
+ welcome
1036
+
1037
+ well
1038
+
1039
+ went
1040
+
1041
+ were
1042
+
1043
+ weren't
1044
+
1045
+ what
1046
+
1047
+ what's
1048
+
1049
+ whatever
1050
+
1051
+ when
1052
+
1053
+ whence
1054
+
1055
+ whenever
1056
+
1057
+ where
1058
+
1059
+ where's
1060
+
1061
+ whereafter
1062
+
1063
+ whereas
1064
+
1065
+ whereby
1066
+
1067
+ wherein
1068
+
1069
+ whereupon
1070
+
1071
+ wherever
1072
+
1073
+ whether
1074
+
1075
+ which
1076
+
1077
+ while
1078
+
1079
+ whither
1080
+
1081
+ who
1082
+
1083
+ who's
1084
+
1085
+ whoever
1086
+
1087
+ whole
1088
+
1089
+ whom
1090
+
1091
+ whose
1092
+
1093
+ why
1094
+
1095
+ will
1096
+
1097
+ willing
1098
+
1099
+ wish
1100
+
1101
+ with
1102
+
1103
+ within
1104
+
1105
+ without
1106
+
1107
+ won't
1108
+
1109
+ wonder
1110
+
1111
+ would
1112
+
1113
+ would
1114
+
1115
+ wouldn't
1116
+
1117
+ x
1118
+
1119
+ y
1120
+
1121
+ yes
1122
+
1123
+ yet
1124
+
1125
+ you
1126
+
1127
+ you'd
1128
+
1129
+ you'll
1130
+
1131
+ you're
1132
+
1133
+ you've
1134
+
1135
+ your
1136
+
1137
+ yours
1138
+
1139
+ yourself
1140
+
1141
+ yourselves
1142
+
1143
+ z
1144
+
1145
+ zero
1146
+ )
1147
+ end
1148
+ end
1149
+ end