sclust 1.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,96 @@
1
+ #
2
+ # The MIT License
3
+ #
4
+ # Copyright (c) 2010 Samuel R. Baskinger
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+ #
24
+
25
+ module SClust
26
+
27
+ module Util
28
+
29
+ #
30
+ # Use cases:
31
+ #
32
+ #
33
+ class SparseVector < Hash
34
+
35
+ def initialize(default_value=nil)
36
+ super(default_value)
37
+ @default_value = default_value
38
+ end
39
+
40
+ def store(key, value)
41
+ if ( @default_value == value)
42
+ delete(key) if ( member?(key) )
43
+ value
44
+ else
45
+ super(key, value)
46
+ end
47
+ end
48
+
49
+ def [](key)
50
+ if has_key?(key)
51
+ super(key)
52
+ else
53
+ @default_value
54
+ end
55
+ end
56
+
57
+ alias []= store
58
+
59
+ end
60
+
61
+ class SparseLabeledVector < SparseVector
62
+
63
+ # Map keys to the user-defined label.
64
+ attr_reader :key_map
65
+
66
+ # Map labels to the key the data is stored under.
67
+ attr_reader :label_map
68
+
69
+ def initialize(default_value=nil)
70
+ super(default_value)
71
+ @label_map = {}
72
+ @key_map = {}
73
+ end
74
+
75
+ # Aliased to []=, this stored the (key, value) pair as in the Hash class but accepts an optional 3rd element
76
+ # which will label the key. This populates values in the attributes label_map[label] => key and key_map[key] => label.
77
+ def store(key, value, label=nil)
78
+ super(key, value)
79
+
80
+ if label
81
+ @label_map[label] = key
82
+ @key_map[key] = label
83
+ end
84
+ end
85
+
86
+ def delete(key)
87
+ if super(key)
88
+ label = @key_map.delete(key)
89
+
90
+ @label_map.delete(label) if label
91
+ end
92
+ end
93
+ end
94
+
95
+ end
96
+ end
@@ -0,0 +1,1149 @@
1
+ module SClust
2
+ module Util
3
+ module StopwordList
4
+ @@stopword_list = %w(
5
+ a
6
+
7
+ a's
8
+
9
+ able
10
+
11
+ about
12
+
13
+ above
14
+
15
+ according
16
+
17
+ accordingly
18
+
19
+ across
20
+
21
+ actually
22
+
23
+ after
24
+
25
+ afterwards
26
+
27
+ again
28
+
29
+ against
30
+
31
+ ain't
32
+
33
+ all
34
+
35
+ allow
36
+
37
+ allows
38
+
39
+ almost
40
+
41
+ alone
42
+
43
+ along
44
+
45
+ already
46
+
47
+ also
48
+
49
+ although
50
+
51
+ always
52
+
53
+ am
54
+
55
+ among
56
+
57
+ amongst
58
+
59
+ an
60
+
61
+ and
62
+
63
+ another
64
+
65
+ any
66
+
67
+ anybody
68
+
69
+ anyhow
70
+
71
+ anyone
72
+
73
+ anything
74
+
75
+ anyway
76
+
77
+ anyways
78
+
79
+ anywhere
80
+
81
+ apart
82
+
83
+ appear
84
+
85
+ appreciate
86
+
87
+ appropriate
88
+
89
+ are
90
+
91
+ aren't
92
+
93
+ around
94
+
95
+ as
96
+
97
+ aside
98
+
99
+ ask
100
+
101
+ asking
102
+
103
+ associated
104
+
105
+ at
106
+
107
+ available
108
+
109
+ away
110
+
111
+ awfully
112
+
113
+ b
114
+
115
+ be
116
+
117
+ became
118
+
119
+ because
120
+
121
+ become
122
+
123
+ becomes
124
+
125
+ becoming
126
+
127
+ been
128
+
129
+ before
130
+
131
+ beforehand
132
+
133
+ behind
134
+
135
+ being
136
+
137
+ believe
138
+
139
+ below
140
+
141
+ beside
142
+
143
+ besides
144
+
145
+ best
146
+
147
+ better
148
+
149
+ between
150
+
151
+ beyond
152
+
153
+ both
154
+
155
+ brief
156
+
157
+ but
158
+
159
+ by
160
+
161
+ c
162
+
163
+ c'mon
164
+
165
+ c's
166
+
167
+ came
168
+
169
+ can
170
+
171
+ can't
172
+
173
+ cannot
174
+
175
+ cant
176
+
177
+ cause
178
+
179
+ causes
180
+
181
+ certain
182
+
183
+ certainly
184
+
185
+ changes
186
+
187
+ clearly
188
+
189
+ co
190
+
191
+ com
192
+
193
+ come
194
+
195
+ comes
196
+
197
+ concerning
198
+
199
+ consequently
200
+
201
+ consider
202
+
203
+ considering
204
+
205
+ contain
206
+
207
+ containing
208
+
209
+ contains
210
+
211
+ corresponding
212
+
213
+ could
214
+
215
+ couldn't
216
+
217
+ course
218
+
219
+ currently
220
+
221
+ d
222
+
223
+ definitely
224
+
225
+ described
226
+
227
+ despite
228
+
229
+ did
230
+
231
+ didn't
232
+
233
+ different
234
+
235
+ do
236
+
237
+ does
238
+
239
+ doesn't
240
+
241
+ doing
242
+
243
+ don't
244
+
245
+ done
246
+
247
+ down
248
+
249
+ downwards
250
+
251
+ during
252
+
253
+ e
254
+
255
+ each
256
+
257
+ edu
258
+
259
+ eg
260
+
261
+ eight
262
+
263
+ either
264
+
265
+ else
266
+
267
+ elsewhere
268
+
269
+ enough
270
+
271
+ entirely
272
+
273
+ especially
274
+
275
+ et
276
+
277
+ etc
278
+
279
+ even
280
+
281
+ ever
282
+
283
+ every
284
+
285
+ everybody
286
+
287
+ everyone
288
+
289
+ everything
290
+
291
+ everywhere
292
+
293
+ ex
294
+
295
+ exactly
296
+
297
+ example
298
+
299
+ except
300
+
301
+ f
302
+
303
+ far
304
+
305
+ few
306
+
307
+ fifth
308
+
309
+ first
310
+
311
+ five
312
+
313
+ followed
314
+
315
+ following
316
+
317
+ follows
318
+
319
+ for
320
+
321
+ former
322
+
323
+ formerly
324
+
325
+ forth
326
+
327
+ four
328
+
329
+ from
330
+
331
+ further
332
+
333
+ furthermore
334
+
335
+ g
336
+
337
+ get
338
+
339
+ gets
340
+
341
+ getting
342
+
343
+ given
344
+
345
+ gives
346
+
347
+ go
348
+
349
+ goes
350
+
351
+ going
352
+
353
+ gone
354
+
355
+ got
356
+
357
+ gotten
358
+
359
+ greetings
360
+
361
+ h
362
+
363
+ had
364
+
365
+ hadn't
366
+
367
+ happens
368
+
369
+ hardly
370
+
371
+ has
372
+
373
+ hasn't
374
+
375
+ have
376
+
377
+ haven't
378
+
379
+ having
380
+
381
+ he
382
+
383
+ he's
384
+
385
+ hello
386
+
387
+ help
388
+
389
+ hence
390
+
391
+ her
392
+
393
+ here
394
+
395
+ here's
396
+
397
+ hereafter
398
+
399
+ hereby
400
+
401
+ herein
402
+
403
+ hereupon
404
+
405
+ hers
406
+
407
+ herself
408
+
409
+ hi
410
+
411
+ him
412
+
413
+ himself
414
+
415
+ his
416
+
417
+ hither
418
+
419
+ hopefully
420
+
421
+ how
422
+
423
+ howbeit
424
+
425
+ however
426
+
427
+ i
428
+
429
+ i'd
430
+
431
+ i'll
432
+
433
+ i'm
434
+
435
+ i've
436
+
437
+ ie
438
+
439
+ if
440
+
441
+ ignored
442
+
443
+ immediate
444
+
445
+ in
446
+
447
+ inasmuch
448
+
449
+ inc
450
+
451
+ indeed
452
+
453
+ indicate
454
+
455
+ indicated
456
+
457
+ indicates
458
+
459
+ inner
460
+
461
+ insofar
462
+
463
+ instead
464
+
465
+ into
466
+
467
+ inward
468
+
469
+ is
470
+
471
+ isn't
472
+
473
+ it
474
+
475
+ it'd
476
+
477
+ it'll
478
+
479
+ it's
480
+
481
+ its
482
+
483
+ itself
484
+
485
+ j
486
+
487
+ just
488
+
489
+ k
490
+
491
+ keep
492
+
493
+ keeps
494
+
495
+ kept
496
+
497
+ know
498
+
499
+ knows
500
+
501
+ known
502
+
503
+ l
504
+
505
+ last
506
+
507
+ lately
508
+
509
+ later
510
+
511
+ latter
512
+
513
+ latterly
514
+
515
+ least
516
+
517
+ less
518
+
519
+ lest
520
+
521
+ let
522
+
523
+ let's
524
+
525
+ like
526
+
527
+ liked
528
+
529
+ likely
530
+
531
+ little
532
+
533
+ look
534
+
535
+ looking
536
+
537
+ looks
538
+
539
+ ltd
540
+
541
+ m
542
+
543
+ mainly
544
+
545
+ many
546
+
547
+ may
548
+
549
+ maybe
550
+
551
+ me
552
+
553
+ mean
554
+
555
+ meanwhile
556
+
557
+ merely
558
+
559
+ might
560
+
561
+ more
562
+
563
+ moreover
564
+
565
+ most
566
+
567
+ mostly
568
+
569
+ much
570
+
571
+ must
572
+
573
+ my
574
+
575
+ myself
576
+
577
+ n
578
+
579
+ name
580
+
581
+ namely
582
+
583
+ nd
584
+
585
+ near
586
+
587
+ nearly
588
+
589
+ necessary
590
+
591
+ need
592
+
593
+ needs
594
+
595
+ neither
596
+
597
+ never
598
+
599
+ nevertheless
600
+
601
+ new
602
+
603
+ next
604
+
605
+ nine
606
+
607
+ no
608
+
609
+ nobody
610
+
611
+ non
612
+
613
+ none
614
+
615
+ noone
616
+
617
+ nor
618
+
619
+ normally
620
+
621
+ not
622
+
623
+ nothing
624
+
625
+ novel
626
+
627
+ now
628
+
629
+ nowhere
630
+
631
+ o
632
+
633
+ obviously
634
+
635
+ of
636
+
637
+ off
638
+
639
+ often
640
+
641
+ oh
642
+
643
+ ok
644
+
645
+ okay
646
+
647
+ old
648
+
649
+ on
650
+
651
+ once
652
+
653
+ one
654
+
655
+ ones
656
+
657
+ only
658
+
659
+ onto
660
+
661
+ or
662
+
663
+ other
664
+
665
+ others
666
+
667
+ otherwise
668
+
669
+ ought
670
+
671
+ our
672
+
673
+ ours
674
+
675
+ ourselves
676
+
677
+ out
678
+
679
+ outside
680
+
681
+ over
682
+
683
+ overall
684
+
685
+ own
686
+
687
+ p
688
+
689
+ particular
690
+
691
+ particularly
692
+
693
+ per
694
+
695
+ perhaps
696
+
697
+ placed
698
+
699
+ please
700
+
701
+ plus
702
+
703
+ possible
704
+
705
+ presumably
706
+
707
+ probably
708
+
709
+ provides
710
+
711
+ q
712
+
713
+ que
714
+
715
+ quite
716
+
717
+ qv
718
+
719
+ r
720
+
721
+ rather
722
+
723
+ rd
724
+
725
+ re
726
+
727
+ really
728
+
729
+ reasonably
730
+
731
+ regarding
732
+
733
+ regardless
734
+
735
+ regards
736
+
737
+ relatively
738
+
739
+ respectively
740
+
741
+ right
742
+
743
+ s
744
+
745
+ said
746
+
747
+ same
748
+
749
+ saw
750
+
751
+ say
752
+
753
+ saying
754
+
755
+ says
756
+
757
+ second
758
+
759
+ secondly
760
+
761
+ see
762
+
763
+ seeing
764
+
765
+ seem
766
+
767
+ seemed
768
+
769
+ seeming
770
+
771
+ seems
772
+
773
+ seen
774
+
775
+ self
776
+
777
+ selves
778
+
779
+ sensible
780
+
781
+ sent
782
+
783
+ serious
784
+
785
+ seriously
786
+
787
+ seven
788
+
789
+ several
790
+
791
+ shall
792
+
793
+ she
794
+
795
+ should
796
+
797
+ shouldn't
798
+
799
+ since
800
+
801
+ six
802
+
803
+ so
804
+
805
+ some
806
+
807
+ somebody
808
+
809
+ somehow
810
+
811
+ someone
812
+
813
+ something
814
+
815
+ sometime
816
+
817
+ sometimes
818
+
819
+ somewhat
820
+
821
+ somewhere
822
+
823
+ soon
824
+
825
+ sorry
826
+
827
+ specified
828
+
829
+ specify
830
+
831
+ specifying
832
+
833
+ still
834
+
835
+ sub
836
+
837
+ such
838
+
839
+ sup
840
+
841
+ sure
842
+
843
+ t
844
+
845
+ t's
846
+
847
+ take
848
+
849
+ taken
850
+
851
+ tell
852
+
853
+ tends
854
+
855
+ th
856
+
857
+ than
858
+
859
+ thank
860
+
861
+ thanks
862
+
863
+ thanx
864
+
865
+ that
866
+
867
+ that's
868
+
869
+ thats
870
+
871
+ the
872
+
873
+ their
874
+
875
+ theirs
876
+
877
+ them
878
+
879
+ themselves
880
+
881
+ then
882
+
883
+ thence
884
+
885
+ there
886
+
887
+ there's
888
+
889
+ thereafter
890
+
891
+ thereby
892
+
893
+ therefore
894
+
895
+ therein
896
+
897
+ theres
898
+
899
+ thereupon
900
+
901
+ these
902
+
903
+ they
904
+
905
+ they'd
906
+
907
+ they'll
908
+
909
+ they're
910
+
911
+ they've
912
+
913
+ think
914
+
915
+ third
916
+
917
+ this
918
+
919
+ thorough
920
+
921
+ thoroughly
922
+
923
+ those
924
+
925
+ though
926
+
927
+ three
928
+
929
+ through
930
+
931
+ throughout
932
+
933
+ thru
934
+
935
+ thus
936
+
937
+ to
938
+
939
+ together
940
+
941
+ too
942
+
943
+ took
944
+
945
+ toward
946
+
947
+ towards
948
+
949
+ tried
950
+
951
+ tries
952
+
953
+ truly
954
+
955
+ try
956
+
957
+ trying
958
+
959
+ twice
960
+
961
+ two
962
+
963
+ u
964
+
965
+ un
966
+
967
+ under
968
+
969
+ unfortunately
970
+
971
+ unless
972
+
973
+ unlikely
974
+
975
+ until
976
+
977
+ unto
978
+
979
+ up
980
+
981
+ upon
982
+
983
+ us
984
+
985
+ use
986
+
987
+ used
988
+
989
+ useful
990
+
991
+ uses
992
+
993
+ using
994
+
995
+ usually
996
+
997
+ uucp
998
+
999
+ v
1000
+
1001
+ value
1002
+
1003
+ various
1004
+
1005
+ very
1006
+
1007
+ via
1008
+
1009
+ viz
1010
+
1011
+ vs
1012
+
1013
+ w
1014
+
1015
+ want
1016
+
1017
+ wants
1018
+
1019
+ was
1020
+
1021
+ wasn't
1022
+
1023
+ way
1024
+
1025
+ we
1026
+
1027
+ we'd
1028
+
1029
+ we'll
1030
+
1031
+ we're
1032
+
1033
+ we've
1034
+
1035
+ welcome
1036
+
1037
+ well
1038
+
1039
+ went
1040
+
1041
+ were
1042
+
1043
+ weren't
1044
+
1045
+ what
1046
+
1047
+ what's
1048
+
1049
+ whatever
1050
+
1051
+ when
1052
+
1053
+ whence
1054
+
1055
+ whenever
1056
+
1057
+ where
1058
+
1059
+ where's
1060
+
1061
+ whereafter
1062
+
1063
+ whereas
1064
+
1065
+ whereby
1066
+
1067
+ wherein
1068
+
1069
+ whereupon
1070
+
1071
+ wherever
1072
+
1073
+ whether
1074
+
1075
+ which
1076
+
1077
+ while
1078
+
1079
+ whither
1080
+
1081
+ who
1082
+
1083
+ who's
1084
+
1085
+ whoever
1086
+
1087
+ whole
1088
+
1089
+ whom
1090
+
1091
+ whose
1092
+
1093
+ why
1094
+
1095
+ will
1096
+
1097
+ willing
1098
+
1099
+ wish
1100
+
1101
+ with
1102
+
1103
+ within
1104
+
1105
+ without
1106
+
1107
+ won't
1108
+
1109
+ wonder
1110
+
1111
+ would
1112
+
1113
+ would
1114
+
1115
+ wouldn't
1116
+
1117
+ x
1118
+
1119
+ y
1120
+
1121
+ yes
1122
+
1123
+ yet
1124
+
1125
+ you
1126
+
1127
+ you'd
1128
+
1129
+ you'll
1130
+
1131
+ you're
1132
+
1133
+ you've
1134
+
1135
+ your
1136
+
1137
+ yours
1138
+
1139
+ yourself
1140
+
1141
+ yourselves
1142
+
1143
+ z
1144
+
1145
+ zero
1146
+ )
1147
+ end
1148
+ end
1149
+ end