twitter_ebooks 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +12 -12
- data/bin/ebooks +21 -6
- data/data/ANC-all-count.txt +297241 -0
- data/data/stopwords.txt +204 -0
- data/data/wordfreq.json +1 -0
- data/lib/twitter_ebooks/bot.rb +25 -7
- data/lib/twitter_ebooks/markov.rb +55 -63
- data/lib/twitter_ebooks/model.rb +57 -74
- data/lib/twitter_ebooks/nlp.rb +90 -55
- data/lib/twitter_ebooks/version.rb +1 -1
- data/script/process_anc_data.rb +19 -0
- data/skeleton/Procfile +1 -1
- data/skeleton/bots.rb +0 -6
- data/skeleton/corpus/README.md +1 -1
- data/skeleton/run.rb +9 -0
- data/test/keywords.rb +18 -0
- data/twitter_ebooks.gemspec +3 -5
- metadata +13 -40
- data/skeleton/model/README.md +0 -1
data/data/stopwords.txt
CHANGED
@@ -3,11 +3,17 @@ able
|
|
3
3
|
about
|
4
4
|
above
|
5
5
|
abst
|
6
|
+
accordance
|
7
|
+
according
|
8
|
+
accordingly
|
6
9
|
across
|
7
10
|
act
|
8
11
|
actually
|
9
12
|
added
|
10
13
|
adj
|
14
|
+
affected
|
15
|
+
affecting
|
16
|
+
affects
|
11
17
|
after
|
12
18
|
afterwards
|
13
19
|
again
|
@@ -38,6 +44,7 @@ anyway
|
|
38
44
|
anyways
|
39
45
|
anywhere
|
40
46
|
apparently
|
47
|
+
approximately
|
41
48
|
are
|
42
49
|
aren
|
43
50
|
arent
|
@@ -49,7 +56,9 @@ ask
|
|
49
56
|
asking
|
50
57
|
at
|
51
58
|
auth
|
59
|
+
available
|
52
60
|
away
|
61
|
+
awfully
|
53
62
|
b
|
54
63
|
back
|
55
64
|
be
|
@@ -61,6 +70,10 @@ becoming
|
|
61
70
|
been
|
62
71
|
before
|
63
72
|
beforehand
|
73
|
+
begin
|
74
|
+
beginning
|
75
|
+
beginnings
|
76
|
+
begins
|
64
77
|
behind
|
65
78
|
being
|
66
79
|
believe
|
@@ -83,11 +96,14 @@ cannot
|
|
83
96
|
can't
|
84
97
|
cause
|
85
98
|
causes
|
99
|
+
certain
|
100
|
+
certainly
|
86
101
|
co
|
87
102
|
com
|
88
103
|
come
|
89
104
|
comes
|
90
105
|
contain
|
106
|
+
containing
|
91
107
|
contains
|
92
108
|
could
|
93
109
|
couldnt
|
@@ -126,6 +142,11 @@ et-al
|
|
126
142
|
etc
|
127
143
|
even
|
128
144
|
ever
|
145
|
+
every
|
146
|
+
everybody
|
147
|
+
everyone
|
148
|
+
everything
|
149
|
+
everywhere
|
129
150
|
ex
|
130
151
|
except
|
131
152
|
f
|
@@ -209,8 +230,10 @@ in
|
|
209
230
|
inc
|
210
231
|
indeed
|
211
232
|
index
|
233
|
+
information
|
212
234
|
instead
|
213
235
|
into
|
236
|
+
invention
|
214
237
|
inward
|
215
238
|
is
|
216
239
|
isn't
|
@@ -269,6 +292,7 @@ meanwhile
|
|
269
292
|
merely
|
270
293
|
mg
|
271
294
|
might
|
295
|
+
million
|
272
296
|
miss
|
273
297
|
ml
|
274
298
|
more
|
@@ -296,6 +320,7 @@ need
|
|
296
320
|
needs
|
297
321
|
neither
|
298
322
|
never
|
323
|
+
nevertheless
|
299
324
|
new
|
300
325
|
next
|
301
326
|
nine
|
@@ -434,6 +459,8 @@ showed
|
|
434
459
|
shown
|
435
460
|
showns
|
436
461
|
shows
|
462
|
+
significant
|
463
|
+
significantly
|
437
464
|
similar
|
438
465
|
similarly
|
439
466
|
since
|
@@ -613,11 +640,13 @@ whose
|
|
613
640
|
why
|
614
641
|
widely
|
615
642
|
willing
|
643
|
+
wish
|
616
644
|
with
|
617
645
|
within
|
618
646
|
without
|
619
647
|
won't
|
620
648
|
words
|
649
|
+
world
|
621
650
|
would
|
622
651
|
wouldn't
|
623
652
|
www
|
@@ -631,9 +660,184 @@ you'll
|
|
631
660
|
your
|
632
661
|
youre
|
633
662
|
yours
|
663
|
+
yourself
|
664
|
+
yourselves
|
634
665
|
you've
|
635
666
|
z
|
636
667
|
zero
|
637
668
|
.
|
638
669
|
?
|
639
670
|
!
|
671
|
+
|
672
|
+
http
|
673
|
+
don
|
674
|
+
people
|
675
|
+
well
|
676
|
+
will
|
677
|
+
https
|
678
|
+
time
|
679
|
+
good
|
680
|
+
thing
|
681
|
+
twitter
|
682
|
+
pretty
|
683
|
+
it's
|
684
|
+
i'm
|
685
|
+
that's
|
686
|
+
you're
|
687
|
+
they're
|
688
|
+
there's
|
689
|
+
things
|
690
|
+
yeah
|
691
|
+
find
|
692
|
+
going
|
693
|
+
work
|
694
|
+
point
|
695
|
+
years
|
696
|
+
guess
|
697
|
+
bad
|
698
|
+
problem
|
699
|
+
real
|
700
|
+
kind
|
701
|
+
day
|
702
|
+
better
|
703
|
+
lot
|
704
|
+
stuff
|
705
|
+
i'd
|
706
|
+
read
|
707
|
+
thought
|
708
|
+
idea
|
709
|
+
case
|
710
|
+
word
|
711
|
+
hey
|
712
|
+
person
|
713
|
+
long
|
714
|
+
Dear
|
715
|
+
internet
|
716
|
+
tweet
|
717
|
+
he's
|
718
|
+
feel
|
719
|
+
wrong
|
720
|
+
call
|
721
|
+
hard
|
722
|
+
phone
|
723
|
+
ago
|
724
|
+
literally
|
725
|
+
remember
|
726
|
+
reason
|
727
|
+
called
|
728
|
+
course
|
729
|
+
bit
|
730
|
+
question
|
731
|
+
high
|
732
|
+
today
|
733
|
+
told
|
734
|
+
man
|
735
|
+
actual
|
736
|
+
year
|
737
|
+
three
|
738
|
+
book
|
739
|
+
assume
|
740
|
+
life
|
741
|
+
true
|
742
|
+
best
|
743
|
+
wow
|
744
|
+
video
|
745
|
+
times
|
746
|
+
works
|
747
|
+
fact
|
748
|
+
completely
|
749
|
+
totally
|
750
|
+
imo
|
751
|
+
open
|
752
|
+
lol
|
753
|
+
haha
|
754
|
+
cool
|
755
|
+
yep
|
756
|
+
ooh
|
757
|
+
great
|
758
|
+
ugh
|
759
|
+
tonight
|
760
|
+
talk
|
761
|
+
sounds
|
762
|
+
hahaha
|
763
|
+
whoa
|
764
|
+
cool
|
765
|
+
we're
|
766
|
+
guys
|
767
|
+
sweet
|
768
|
+
fortunately
|
769
|
+
hmm
|
770
|
+
aren't
|
771
|
+
sadly
|
772
|
+
talking
|
773
|
+
you'd
|
774
|
+
place
|
775
|
+
yup
|
776
|
+
what's
|
777
|
+
y'know
|
778
|
+
basically
|
779
|
+
god
|
780
|
+
shit
|
781
|
+
holy
|
782
|
+
interesting
|
783
|
+
news
|
784
|
+
guy
|
785
|
+
wait
|
786
|
+
oooh
|
787
|
+
gonna
|
788
|
+
current
|
789
|
+
let's
|
790
|
+
tomorrow
|
791
|
+
omg
|
792
|
+
hate
|
793
|
+
hope
|
794
|
+
fuck
|
795
|
+
oops
|
796
|
+
night
|
797
|
+
wear
|
798
|
+
wanna
|
799
|
+
fun
|
800
|
+
finally
|
801
|
+
whoops
|
802
|
+
nevermind
|
803
|
+
definitely
|
804
|
+
context
|
805
|
+
screen
|
806
|
+
free
|
807
|
+
exactly
|
808
|
+
big
|
809
|
+
house
|
810
|
+
half
|
811
|
+
working
|
812
|
+
play
|
813
|
+
heard
|
814
|
+
hmmm
|
815
|
+
damn
|
816
|
+
woah
|
817
|
+
tho
|
818
|
+
set
|
819
|
+
idk
|
820
|
+
sort
|
821
|
+
understand
|
822
|
+
kinda
|
823
|
+
seriously
|
824
|
+
btw
|
825
|
+
she's
|
826
|
+
hah
|
827
|
+
aww
|
828
|
+
ffs
|
829
|
+
it'd
|
830
|
+
that'd
|
831
|
+
hopefully
|
832
|
+
non
|
833
|
+
entirely
|
834
|
+
lots
|
835
|
+
entire
|
836
|
+
tend
|
837
|
+
hullo
|
838
|
+
clearly
|
839
|
+
surely
|
840
|
+
weird
|
841
|
+
start
|
842
|
+
help
|
843
|
+
nope
|