twitter_ebooks 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,639 @@
1
+ a
2
+ able
3
+ about
4
+ above
5
+ abst
6
+ across
7
+ act
8
+ actually
9
+ added
10
+ adj
11
+ after
12
+ afterwards
13
+ again
14
+ against
15
+ ah
16
+ all
17
+ almost
18
+ alone
19
+ along
20
+ already
21
+ also
22
+ although
23
+ always
24
+ am
25
+ among
26
+ amongst
27
+ an
28
+ and
29
+ announce
30
+ another
31
+ any
32
+ anybody
33
+ anyhow
34
+ anymore
35
+ anyone
36
+ anything
37
+ anyway
38
+ anyways
39
+ anywhere
40
+ apparently
41
+ are
42
+ aren
43
+ arent
44
+ arise
45
+ around
46
+ as
47
+ aside
48
+ ask
49
+ asking
50
+ at
51
+ auth
52
+ away
53
+ b
54
+ back
55
+ be
56
+ became
57
+ because
58
+ become
59
+ becomes
60
+ becoming
61
+ been
62
+ before
63
+ beforehand
64
+ behind
65
+ being
66
+ believe
67
+ below
68
+ beside
69
+ besides
70
+ between
71
+ beyond
72
+ biol
73
+ both
74
+ brief
75
+ briefly
76
+ but
77
+ by
78
+ c
79
+ ca
80
+ came
81
+ can
82
+ cannot
83
+ can't
84
+ cause
85
+ causes
86
+ co
87
+ com
88
+ come
89
+ comes
90
+ contain
91
+ contains
92
+ could
93
+ couldnt
94
+ d
95
+ date
96
+ did
97
+ didn't
98
+ different
99
+ do
100
+ does
101
+ doesn't
102
+ doing
103
+ done
104
+ don't
105
+ down
106
+ downwards
107
+ due
108
+ during
109
+ e
110
+ each
111
+ ed
112
+ edu
113
+ effect
114
+ eg
115
+ eight
116
+ eighty
117
+ either
118
+ else
119
+ elsewhere
120
+ end
121
+ ending
122
+ enough
123
+ especially
124
+ et
125
+ et-al
126
+ etc
127
+ even
128
+ ever
129
+ ex
130
+ except
131
+ f
132
+ far
133
+ few
134
+ ff
135
+ fifth
136
+ first
137
+ five
138
+ fix
139
+ followed
140
+ following
141
+ follows
142
+ for
143
+ former
144
+ formerly
145
+ forth
146
+ found
147
+ four
148
+ from
149
+ further
150
+ furthermore
151
+ g
152
+ gave
153
+ get
154
+ gets
155
+ getting
156
+ give
157
+ given
158
+ gives
159
+ giving
160
+ go
161
+ goes
162
+ gone
163
+ got
164
+ gotten
165
+ h
166
+ had
167
+ happens
168
+ hardly
169
+ has
170
+ hasn't
171
+ have
172
+ haven't
173
+ having
174
+ he
175
+ hed
176
+ hence
177
+ her
178
+ here
179
+ hereafter
180
+ hereby
181
+ herein
182
+ heres
183
+ hereupon
184
+ hers
185
+ herself
186
+ hes
187
+ hi
188
+ hid
189
+ him
190
+ himself
191
+ his
192
+ hither
193
+ home
194
+ how
195
+ howbeit
196
+ however
197
+ hundred
198
+ i
199
+ id
200
+ ie
201
+ if
202
+ i'll
203
+ im
204
+ immediate
205
+ immediately
206
+ importance
207
+ important
208
+ in
209
+ inc
210
+ indeed
211
+ index
212
+ instead
213
+ into
214
+ inward
215
+ is
216
+ isn't
217
+ it
218
+ itd
219
+ it'll
220
+ its
221
+ itself
222
+ i've
223
+ j
224
+ just
225
+ k
226
+ keep
227
+ keeps
228
+ kept
229
+ kg
230
+ km
231
+ know
232
+ known
233
+ knows
234
+ l
235
+ largely
236
+ last
237
+ lately
238
+ later
239
+ latter
240
+ latterly
241
+ least
242
+ less
243
+ lest
244
+ let
245
+ lets
246
+ like
247
+ liked
248
+ likely
249
+ line
250
+ little
251
+ 'll
252
+ look
253
+ looking
254
+ looks
255
+ ltd
256
+ m
257
+ made
258
+ mainly
259
+ make
260
+ makes
261
+ many
262
+ may
263
+ maybe
264
+ me
265
+ mean
266
+ means
267
+ meantime
268
+ meanwhile
269
+ merely
270
+ mg
271
+ might
272
+ miss
273
+ ml
274
+ more
275
+ moreover
276
+ most
277
+ mostly
278
+ mr
279
+ mrs
280
+ much
281
+ mug
282
+ must
283
+ my
284
+ myself
285
+ n
286
+ na
287
+ name
288
+ namely
289
+ nay
290
+ nd
291
+ near
292
+ nearly
293
+ necessarily
294
+ necessary
295
+ need
296
+ needs
297
+ neither
298
+ never
299
+ new
300
+ next
301
+ nine
302
+ ninety
303
+ no
304
+ nobody
305
+ non
306
+ none
307
+ nonetheless
308
+ noone
309
+ nor
310
+ normally
311
+ nos
312
+ not
313
+ noted
314
+ nothing
315
+ now
316
+ nowhere
317
+ o
318
+ obtain
319
+ obtained
320
+ obviously
321
+ of
322
+ off
323
+ often
324
+ oh
325
+ ok
326
+ okay
327
+ old
328
+ omitted
329
+ on
330
+ once
331
+ one
332
+ ones
333
+ only
334
+ onto
335
+ or
336
+ ord
337
+ other
338
+ others
339
+ otherwise
340
+ ought
341
+ our
342
+ ours
343
+ ourselves
344
+ out
345
+ outside
346
+ over
347
+ overall
348
+ owing
349
+ own
350
+ p
351
+ page
352
+ pages
353
+ part
354
+ particular
355
+ particularly
356
+ past
357
+ per
358
+ perhaps
359
+ placed
360
+ please
361
+ plus
362
+ poorly
363
+ possible
364
+ possibly
365
+ potentially
366
+ pp
367
+ predominantly
368
+ present
369
+ previously
370
+ primarily
371
+ probably
372
+ promptly
373
+ proud
374
+ provides
375
+ put
376
+ q
377
+ que
378
+ quickly
379
+ quite
380
+ qv
381
+ r
382
+ ran
383
+ rather
384
+ rd
385
+ re
386
+ readily
387
+ really
388
+ recent
389
+ recently
390
+ ref
391
+ refs
392
+ regarding
393
+ regardless
394
+ regards
395
+ related
396
+ relatively
397
+ research
398
+ respectively
399
+ resulted
400
+ resulting
401
+ results
402
+ right
403
+ run
404
+ s
405
+ said
406
+ same
407
+ saw
408
+ say
409
+ saying
410
+ says
411
+ sec
412
+ section
413
+ see
414
+ seeing
415
+ seem
416
+ seemed
417
+ seeming
418
+ seems
419
+ seen
420
+ self
421
+ selves
422
+ sent
423
+ seven
424
+ several
425
+ shall
426
+ she
427
+ shed
428
+ she'll
429
+ shes
430
+ should
431
+ shouldn't
432
+ show
433
+ showed
434
+ shown
435
+ showns
436
+ shows
437
+ similar
438
+ similarly
439
+ since
440
+ six
441
+ slightly
442
+ so
443
+ some
444
+ somebody
445
+ somehow
446
+ someone
447
+ somethan
448
+ something
449
+ sometime
450
+ sometimes
451
+ somewhat
452
+ somewhere
453
+ soon
454
+ sorry
455
+ specifically
456
+ specified
457
+ specify
458
+ specifying
459
+ still
460
+ stop
461
+ strongly
462
+ sub
463
+ substantially
464
+ successfully
465
+ such
466
+ sufficiently
467
+ suggest
468
+ sup
469
+ sure
470
+ t
471
+ take
472
+ taken
473
+ taking
474
+ tell
475
+ tends
476
+ th
477
+ than
478
+ thank
479
+ thanks
480
+ thanx
481
+ that
482
+ that'll
483
+ thats
484
+ that've
485
+ the
486
+ their
487
+ theirs
488
+ them
489
+ themselves
490
+ then
491
+ thence
492
+ there
493
+ thereafter
494
+ thereby
495
+ thered
496
+ therefore
497
+ therein
498
+ there'll
499
+ thereof
500
+ therere
501
+ theres
502
+ thereto
503
+ thereupon
504
+ there've
505
+ these
506
+ they
507
+ theyd
508
+ they'll
509
+ theyre
510
+ they've
511
+ think
512
+ this
513
+ those
514
+ thou
515
+ though
516
+ thoughh
517
+ thousand
518
+ throug
519
+ through
520
+ throughout
521
+ thru
522
+ thus
523
+ til
524
+ tip
525
+ to
526
+ together
527
+ too
528
+ took
529
+ toward
530
+ towards
531
+ tried
532
+ tries
533
+ truly
534
+ try
535
+ trying
536
+ ts
537
+ twice
538
+ two
539
+ u
540
+ un
541
+ under
542
+ unfortunately
543
+ unless
544
+ unlike
545
+ unlikely
546
+ until
547
+ unto
548
+ up
549
+ upon
550
+ ups
551
+ us
552
+ use
553
+ used
554
+ useful
555
+ usefully
556
+ usefulness
557
+ uses
558
+ using
559
+ usually
560
+ v
561
+ value
562
+ various
563
+ 've
564
+ very
565
+ via
566
+ viz
567
+ vol
568
+ vols
569
+ vs
570
+ w
571
+ want
572
+ wants
573
+ was
574
+ wasn't
575
+ way
576
+ we
577
+ wed
578
+ welcome
579
+ we'll
580
+ went
581
+ were
582
+ weren't
583
+ we've
584
+ what
585
+ whatever
586
+ what'll
587
+ whats
588
+ when
589
+ whence
590
+ whenever
591
+ where
592
+ whereafter
593
+ whereas
594
+ whereby
595
+ wherein
596
+ wheres
597
+ whereupon
598
+ wherever
599
+ whether
600
+ which
601
+ while
602
+ whim
603
+ whither
604
+ who
605
+ whod
606
+ whoever
607
+ whole
608
+ who'll
609
+ whom
610
+ whomever
611
+ whos
612
+ whose
613
+ why
614
+ widely
615
+ willing
616
+ with
617
+ within
618
+ without
619
+ won't
620
+ words
621
+ would
622
+ wouldn't
623
+ www
624
+ x
625
+ y
626
+ yes
627
+ yet
628
+ you
629
+ youd
630
+ you'll
631
+ your
632
+ youre
633
+ yours
634
+ you've
635
+ z
636
+ zero
637
+ .
638
+ ?
639
+ !
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'twitter'
5
+ require 'tweetstream'
6
+ require 'rufus/scheduler'
7
+ require 'marky_markov'
8
+ require 'htmlentities'
9
+
10
+ module Ebooks
11
+ class Archiver
12
+ def initialize(username, outpath)
13
+ @username = username
14
+ @outpath = outpath
15
+ @client = Twitter::Client.new
16
+ end
17
+
18
+ # Read exiting corpus into memory.
19
+ # Return list of tweet lines and the last tweet id.
20
+ def read_corpus
21
+ lines = []
22
+ since_id = nil
23
+
24
+ if File.exists?(@outpath)
25
+ lines = File.read(@outpath).split("\n")
26
+ if lines[0].start_with?('#')
27
+ since_id = lines[0].split('# ').last
28
+ end
29
+ end
30
+
31
+ [lines, since_id]
32
+ end
33
+
34
+ # Retrieve all available tweets for a given user since the last tweet id
35
+ def tweets_since(since_id)
36
+ page = 1
37
+ retries = 0
38
+ tweets = []
39
+ max_id = nil
40
+
41
+ opts = {
42
+ count: 200,
43
+ include_rts: false,
44
+ trim_user: true
45
+ }
46
+
47
+ opts[:since_id] = since_id unless since_id.nil?
48
+
49
+ loop do
50
+ opts[:max_id] = max_id unless max_id.nil?
51
+ new = @client.user_timeline(@username, opts)
52
+ break if new.length <= 1
53
+ puts "Received #{new.length} tweets"
54
+ tweets += new
55
+ max_id = new.last.id
56
+ break
57
+ end
58
+
59
+ tweets
60
+ end
61
+
62
+ def fetch_tweets
63
+ lines, since_id = read_corpus
64
+
65
+ if since_id.nil?
66
+ puts "Retrieving tweets from @#{@username}"
67
+ else
68
+ puts "Retrieving tweets from @#{@username} since #{since_id}"
69
+ end
70
+
71
+ tweets = tweets_since(since_id)
72
+
73
+ if tweets.length == 0
74
+ puts "No new tweets"
75
+ return
76
+ end
77
+
78
+ new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
79
+ new_since_id = tweets[0].id.to_s
80
+ lines = ["# " + new_since_id] + new_lines + lines
81
+ corpus = File.open(@outpath, 'w')
82
+ corpus.write(lines.join("\n"))
83
+ corpus.close
84
+ end
85
+ end
86
+ end