twitter_ebooks 3.0.8 → 3.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/bin/ebooks +46 -0
- data/lib/twitter_ebooks/model.rb +1 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f4312e16bb18ae646bbca7559d3044ecc7b00f7
|
4
|
+
data.tar.gz: 9fdcb9b3eb8cf9fc4449699ed20661a8b5ef8d2a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39aee9bc75ffe627b24d1f6ae29f7bfd1c3093e3295dfebf5fdafb67b8ed649f417811112c5086f50449bb0a21ee899e7e3a5896e76a0de32a524da548e05a39
|
7
|
+
data.tar.gz: 8136aec89545928bd5a04dd94fd14c078752a913f098a32a6ee96fd2c065f9d03ed14dcdc32550ce17d9f3815aaaee20153b2e24f617903e34502faa85e19a3b
|
data/README.md
CHANGED
@@ -93,7 +93,7 @@ The underlying streaming and REST clients from the [twitter gem](https://github.
|
|
93
93
|
|
94
94
|
## Archiving accounts
|
95
95
|
|
96
|
-
twitter\_ebooks comes with a syncing tool to download and then incrementally update a local json archive of a user's tweets
|
96
|
+
twitter\_ebooks comes with a syncing tool to download and then incrementally update a local json archive of a user's tweets (in this case, my good friend @0xabad1dea):
|
97
97
|
|
98
98
|
``` zsh
|
99
99
|
➜ ebooks archive 0xabad1dea corpus/0xabad1dea.json
|
data/bin/ebooks
CHANGED
@@ -114,6 +114,52 @@ STR
|
|
114
114
|
log "Corpuses consumed to #{outpath}"
|
115
115
|
end
|
116
116
|
|
117
|
+
HELP.jsonify = <<-STR
|
118
|
+
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
119
|
+
|
120
|
+
Takes a csv twitter archive and converts it to json.
|
121
|
+
STR
|
122
|
+
|
123
|
+
def self.jsonify(paths)
|
124
|
+
if paths.empty?
|
125
|
+
log usage
|
126
|
+
exit
|
127
|
+
end
|
128
|
+
|
129
|
+
paths.each do |path|
|
130
|
+
name = File.basename(path).split('.')[0]
|
131
|
+
new_path = name + ".json"
|
132
|
+
|
133
|
+
tweets = []
|
134
|
+
id = nil
|
135
|
+
if path.split('.')[-1] == "csv" #from twitter archive
|
136
|
+
csv_archive = CSV.read(path, :headers=>:first_row)
|
137
|
+
tweets = csv_archive.map do |tweet|
|
138
|
+
{ text: tweet['text'], id: tweet['tweet_id'] }
|
139
|
+
end
|
140
|
+
else
|
141
|
+
File.read(path).split("\n").each do |l|
|
142
|
+
if l.start_with?('# ')
|
143
|
+
id = l.split('# ')[-1]
|
144
|
+
else
|
145
|
+
tweet = { text: l }
|
146
|
+
if id
|
147
|
+
tweet[:id] = id
|
148
|
+
id = nil
|
149
|
+
end
|
150
|
+
tweets << tweet
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
File.open(new_path, 'w') do |f|
|
156
|
+
log "Writing #{tweets.length} tweets to #{new_path}"
|
157
|
+
f.write(JSON.pretty_generate(tweets))
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
|
117
163
|
HELP.gen = <<-STR
|
118
164
|
Usage: ebooks gen <model_path> [input]
|
119
165
|
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -118,7 +118,7 @@ module Ebooks
|
|
118
118
|
tweet[text_col]
|
119
119
|
end
|
120
120
|
else
|
121
|
-
log "Reading plaintext corpus from #{path}"
|
121
|
+
log "Reading plaintext corpus from #{path} (if this is a json or csv file, please rename the file with an extension and reconsume)"
|
122
122
|
lines = content.split("\n")
|
123
123
|
end
|
124
124
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|